diff --git a/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/config-250831.030550.yaml b/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/config-250831.030550.yaml new file mode 100644 index 0000000000000000000000000000000000000000..955115d9ea04cde2b55fb596ecb39894c984f525 --- /dev/null +++ b/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/config-250831.030550.yaml @@ -0,0 +1,260 @@ +enable_cache: true +cache: + root: runs +output: + root: runs + dirname: skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000 + job: run +pipeline: + name: flux.1-dev-ghibli + path: /home/pingzhi/checkpoints/fused_flux_dev_studio_ghibli_bf16 + dtype: torch.bfloat16 + device: cuda + shift_activations: false + enable_lora: false + task: text-to-image +eval: + protocol: fmeuler50-g3.5 + num_gpus: 1 + batch_size: 1 + batch_size_per_gpu: 1 + height: null + width: null + clean_caption: null + num_steps: 50 + guidance_scale: 3.5 + num_samples: 5000 + benchmarks: + - MJHQ + - DCI + gt_metrics: + - clip_iqa + - clip_score + - image_reward + - fid + ref_metrics: + - psnr + - lpips + - ssim + - fid + gen_root: '{output}/{job}' + ref_root: /data/pingzhi/deepcompressor/examples/diffusion/baselines/torch.bfloat16/flux.1-dev-ghibli/fmeuler50-g3.5 + gt_stats_root: benchmarks/stats + control_root: benchmarks + chunk_start: 0 + chunk_step: 1 + chunk_only: false +quant: + wgts: + dtype: sfp4_e2m1_all + zero_point: null + group_shapes: + - - -1 + - -1 + - -1 + - - 1 + - 16 + - 1 + - 1 + - 1 + scale_dtypes: + - null + - sfp8_e4m3_nan + enable_kernel_gptq: false + enable_low_rank: true + low_rank: + rank: 32 + exclusive: false + compensate: false + degree: 2 + objective: OutputsError + strategy: Manual + sample_batch_size: 16 + sample_size: -1 + outputs_device: cpu + num_iters: 100 + early_stop: true + skips: + - down_resblock_conv + - down_transformer_add_norm + - down_transformer_norm + - mid_resblock_conv + - mid_transformer_add_norm + - mid_transformer_norm + - transformer_add_norm + - transformer_norm + - up_resblock_conv + - up_transformer_add_norm + - up_transformer_norm + enable_calib_range: true + calib_range: + degree: 2 + objective: OutputsError + strategy: Manual + granularity: Layer + element_batch_size: -1 + sample_batch_size: 16 + element_size: -1 + sample_size: -1 + pre_reshape: true + outputs_device: cpu + ratio: 1.0 + max_shrink: 0.2 + max_expand: 1.0 + num_grids: 80 + allow_scale: false + skips: [] + skips: + - down_resblock_shortcut + - down_resblock_time_proj + - down_sample + - down_transformer_proj_in + - down_transformer_proj_out + - input_embed + - mid_resblock_shortcut + - mid_resblock_time_proj + - mid_transformer_proj_in + - mid_transformer_proj_out + - output_embed + - text_embed + - time_embed + - up_resblock_shortcut + - up_resblock_time_proj + - up_sample + - up_transformer_proj_in + - up_transformer_proj_out + ipts: + dtype: sfp4_e2m1_all + zero_point: null + group_shapes: + - - 1 + - 16 + - 1 + - 1 + - 1 + scale_dtypes: + - sfp8_e4m3_nan + static: false + enable_calib_range: false + skips: + - down_resblock_shortcut + - down_resblock_time_proj + - down_sample + - down_transformer_add_norm + - down_transformer_norm + - down_transformer_proj_in + - down_transformer_proj_out + - input_embed + - mid_resblock_shortcut + - mid_resblock_time_proj + - mid_transformer_add_norm + - mid_transformer_norm + - mid_transformer_proj_in + - mid_transformer_proj_out + - output_embed + - text_embed + - time_embed + - transformer_add_norm + - transformer_norm + - up_resblock_shortcut + - up_resblock_time_proj + - up_sample + - up_transformer_add_norm + - up_transformer_norm + - up_transformer_proj_in + - up_transformer_proj_out + allow_unsigned: false + opts: + dtype: null + zero_point: null + group_shapes: + - - -1 + - -1 + - -1 + scale_dtypes: + - null + static: false + enable_calib_range: false + skips: [] + allow_unsigned: false + enable_extra_wgts: true + extra_wgts: + dtype: sint4 + zero_point: null + group_shapes: + - - 1 + - 64 + - 1 + - 1 + - 1 + scale_dtypes: + - null + includes: + - down_transformer_add_norm + - down_transformer_norm + - mid_transformer_add_norm + - mid_transformer_norm + - transformer_add_norm + - transformer_norm + - up_transformer_add_norm + - up_transformer_norm + calib: + data: qdiff + num_samples: 128 + batch_size: 16 + path: /data/pingzhi/deepcompressor/examples/diffusion/datasets/torch.bfloat16/flux.1-dev-ghibli/fmeuler50-g3.5/qdiff/s128 + num_workers: 8 + enable_rotation: false + enable_smooth: true + smooth: + enable_proj: true + proj: + degree: 2 + objective: OutputsError + strategy: GridSearch + granularity: Layer + element_batch_size: -1 + sample_batch_size: 16 + element_size: -1 + sample_size: -1 + pre_reshape: true + outputs_device: cpu + fuse_when_possible: false + allow_a_quant: true + allow_b_quant: true + spans: + - - AbsMax + - AbsMax + alpha: 0.5 + beta: -2 + num_grids: 20 + allow_low_rank: true + skips: + - down_resblock_conv + - down_transformer_add_norm + - down_transformer_norm + - mid_resblock_conv + - mid_transformer_add_norm + - mid_transformer_norm + - transformer_add_norm + - transformer_norm + - up_resblock_conv + - up_transformer_add_norm + - up_transformer_norm + enable_attn: false + develop_dtype: torch.float32 +enable_text: false +text_cache: + root: '' + path: + rotation: '' + reorder: '' + smooth: '' + wgts: '' + acts: '' +seed: 12345 +skip_gen: true +skip_eval: true +load_from: '' +save_model: 'true' +copy_on_save: false diff --git a/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/branch.pt b/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/branch.pt new file mode 120000 index 0000000000000000000000000000000000000000..b6dcf53a5af0c7ed508e047c3820afcb289828cc --- /dev/null +++ b/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/branch.pt @@ -0,0 +1 @@ +../../../../../../../../../cache/quant/qdiff.128/branch/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]-w.include.[tan+tn]/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]/lowrank.OutputsError.Manual.Layer.d2.en1.sn1/lowrank.i100.r32.earlystop/lowrank.skip.[rc+tan+tn]/flux.1-dev-ghibli.pt \ No newline at end of file diff --git a/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/model.pt b/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ec3b83fbfe66a5c3f9d3612cd540606a0776867 --- /dev/null +++ b/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc5cd04b5dbc2542d524d8725ef4db10184f2cb7a68c391a421ea141e96dd3fb +size 23803297253 diff --git a/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/scale.pt b/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/scale.pt new file mode 100644 index 0000000000000000000000000000000000000000..729e76a75c6d0dd0cb0b3d0af43a74971563c75e --- /dev/null +++ b/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/scale.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa6048e14f91974ce019fe63b48320b221b3682f3617e402277def100fdb8782 +size 2353874427 diff --git a/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/smooth.pt b/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/smooth.pt new file mode 120000 index 0000000000000000000000000000000000000000..50e352988a94119d6f984c1398553818b2685222 --- /dev/null +++ b/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/smooth.pt @@ -0,0 +1 @@ +../../../../../../../../../cache/quant/qdiff.128/smooth/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]-w.include.[tan+tn]/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]/flux.1-dev-ghibli.pt \ No newline at end of file diff --git a/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/wgts.pt b/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/wgts.pt new file mode 100644 index 0000000000000000000000000000000000000000..56cbba36ba9af4441ab0c7dfcf6a81e55a4e5eea --- /dev/null +++ b/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/model/wgts.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836f77a150a2e5e4ad6f51b65cb1f93b6bf98a16cdf9ce9cd19652a1e102d3dc +size 39771 diff --git a/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/run-250831.030550.log b/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/run-250831.030550.log new file mode 100644 index 0000000000000000000000000000000000000000..9393522bd5e5c6c2fc4f7573c753d8051cc1c774 --- /dev/null +++ b/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550/run-250831.030550.log @@ -0,0 +1,20362 @@ +25-08-31 03:05:50 | I | === Configurations === +25-08-31 03:05:50 | I | DiffusionPtqRunConfig( +25-08-31 03:05:50 | I | cache=DiffusionPtqCacheConfig( +25-08-31 03:05:50 | I | root=runs, +25-08-31 03:05:50 | I | dirpath=DiffusionQuantCacheConfig(smooth='runs/diffusion/cache/quant/qdiff.128/smooth/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]-w.include.[tan+tn]/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]', branch='runs/diffusion/cache/quant/qdiff.128/branch/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]-w.include.[tan+tn]/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]/lowrank.OutputsError.Manual.Layer.d2.en1.sn1/lowrank.i100.r32.earlystop/lowrank.skip.[rc+tan+tn]', wgts='', acts=''), +25-08-31 03:05:50 | I | path=DiffusionQuantCacheConfig(smooth='runs/diffusion/cache/quant/qdiff.128/smooth/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]-w.include.[tan+tn]/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]/flux.1-dev-ghibli.pt', branch='runs/diffusion/cache/quant/qdiff.128/branch/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]-w.include.[tan+tn]/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]/lowrank.OutputsError.Manual.Layer.d2.en1.sn1/lowrank.i100.r32.earlystop/lowrank.skip.[rc+tan+tn]/flux.1-dev-ghibli.pt', wgts='', acts='')), +25-08-31 03:05:50 | I | output=OutputConfig( +25-08-31 03:05:50 | I | root=runs, +25-08-31 03:05:50 | I | dirname=skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000, +25-08-31 03:05:50 | I | job=run, +25-08-31 03:05:50 | I | dirpath=runs/diffusion/flux.1/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000, +25-08-31 03:05:50 | I | timestamp=250831.030550), +25-08-31 03:05:50 | I | pipeline=DiffusionPipelineConfig( +25-08-31 03:05:50 | I | name=flux.1-dev-ghibli, +25-08-31 03:05:50 | I | path=/home/pingzhi/checkpoints/fused_flux_dev_studio_ghibli_bf16, +25-08-31 03:05:50 | I | dtype=torch.bfloat16, +25-08-31 03:05:50 | I | device=cuda, +25-08-31 03:05:50 | I | shift_activations=False, +25-08-31 03:05:50 | I | lora=None, +25-08-31 03:05:50 | I | family=flux.1, +25-08-31 03:05:50 | I | task=text-to-image), +25-08-31 03:05:50 | I | eval=DiffusionEvalConfig( +25-08-31 03:05:50 | I | protocol=fmeuler50-g3.5, +25-08-31 03:05:50 | I | num_gpus=1, +25-08-31 03:05:50 | I | batch_size=1, +25-08-31 03:05:50 | I | batch_size_per_gpu=1, +25-08-31 03:05:50 | I | height=None, +25-08-31 03:05:50 | I | width=None, +25-08-31 03:05:50 | I | clean_caption=None, +25-08-31 03:05:50 | I | num_steps=50, +25-08-31 03:05:50 | I | guidance_scale=3.5, +25-08-31 03:05:50 | I | num_samples=5000, +25-08-31 03:05:50 | I | benchmarks=['MJHQ', 'DCI'], +25-08-31 03:05:50 | I | gt_metrics=['clip_iqa', 'clip_score', 'image_reward', 'fid'], +25-08-31 03:05:50 | I | ref_metrics=['psnr', 'lpips', 'ssim', 'fid'], +25-08-31 03:05:50 | I | gen_root={output}/{job}, +25-08-31 03:05:50 | I | ref_root=/data/pingzhi/deepcompressor/examples/diffusion/baselines/torch.bfloat16/flux.1-dev-ghibli/fmeuler50-g3.5, +25-08-31 03:05:50 | I | gt_stats_root=benchmarks/stats, +25-08-31 03:05:50 | I | control_root=benchmarks, +25-08-31 03:05:50 | I | chunk_start=0, +25-08-31 03:05:50 | I | chunk_step=1, +25-08-31 03:05:50 | I | chunk_only=False), +25-08-31 03:05:50 | I | quant=DiffusionQuantConfig( +25-08-31 03:05:50 | I | wgts=DiffusionWeightQuantizerConfig( +25-08-31 03:05:50 | I | dtype=sfp4_e2m1_all, +25-08-31 03:05:50 | I | zero_point=None, +25-08-31 03:05:50 | I | group_shapes=((-1, -1, -1), (1, 16, 1, 1, 1)), +25-08-31 03:05:50 | I | scale_dtypes=(None, sfp8_e4m3_nan), +25-08-31 03:05:50 | I | static=True, +25-08-31 03:05:50 | I | kernel_gptq=None, +25-08-31 03:05:50 | I | low_rank=SkipBasedQuantLowRankCalibConfig( +25-08-31 03:05:50 | I | rank=32, +25-08-31 03:05:50 | I | exclusive=False, +25-08-31 03:05:50 | I | compensate=False, +25-08-31 03:05:50 | I | degree=2, +25-08-31 03:05:50 | I | objective=SearchBasedCalibObjective.OutputsError, +25-08-31 03:05:50 | I | strategy=SearchBasedCalibStrategy.Manual, +25-08-31 03:05:50 | I | granularity=SearchBasedCalibGranularity.Layer, +25-08-31 03:05:50 | I | element_batch_size=-1, +25-08-31 03:05:50 | I | sample_batch_size=16, +25-08-31 03:05:50 | I | element_size=-1, +25-08-31 03:05:50 | I | sample_size=-1, +25-08-31 03:05:50 | I | pre_reshape=True, +25-08-31 03:05:50 | I | outputs_device=cpu, +25-08-31 03:05:50 | I | num_iters=100, +25-08-31 03:05:50 | I | early_stop=True, +25-08-31 03:05:50 | I | skips=['down_resblock_conv', 'down_transformer_add_norm', 'down_transformer_norm', 'mid_resblock_conv', 'mid_transformer_add_norm', 'mid_transformer_norm', 'transformer_add_norm', 'transformer_norm', 'up_resblock_conv', 'up_transformer_add_norm', 'up_transformer_norm']), +25-08-31 03:05:50 | I | calib_range=SkipBasedDynamicRangeCalibConfig( +25-08-31 03:05:50 | I | degree=2, +25-08-31 03:05:50 | I | objective=SearchBasedCalibObjective.OutputsError, +25-08-31 03:05:50 | I | strategy=SearchBasedCalibStrategy.Manual, +25-08-31 03:05:50 | I | granularity=SearchBasedCalibGranularity.Layer, +25-08-31 03:05:50 | I | element_batch_size=-1, +25-08-31 03:05:50 | I | sample_batch_size=16, +25-08-31 03:05:50 | I | element_size=-1, +25-08-31 03:05:50 | I | sample_size=-1, +25-08-31 03:05:50 | I | pre_reshape=True, +25-08-31 03:05:50 | I | outputs_device=cpu, +25-08-31 03:05:50 | I | ratio=1.0, +25-08-31 03:05:50 | I | max_shrink=0.2, +25-08-31 03:05:50 | I | max_expand=1.0, +25-08-31 03:05:50 | I | num_grids=80, +25-08-31 03:05:50 | I | allow_scale=False, +25-08-31 03:05:50 | I | skips=[]), +25-08-31 03:05:50 | I | skips=['down_resblock_shortcut', 'down_resblock_time_proj', 'down_sample', 'down_transformer_proj_in', 'down_transformer_proj_out', 'input_embed', 'mid_resblock_shortcut', 'mid_resblock_time_proj', 'mid_transformer_proj_in', 'mid_transformer_proj_out', 'output_embed', 'text_embed', 'time_embed', 'up_resblock_shortcut', 'up_resblock_time_proj', 'up_sample', 'up_transformer_proj_in', 'up_transformer_proj_out']), +25-08-31 03:05:50 | I | ipts=DiffusionActivationQuantizerConfig( +25-08-31 03:05:50 | I | dtype=sfp4_e2m1_all, +25-08-31 03:05:50 | I | zero_point=None, +25-08-31 03:05:50 | I | group_shapes=((1, 16, 1, 1, 1),), +25-08-31 03:05:50 | I | scale_dtypes=(sfp8_e4m3_nan,), +25-08-31 03:05:50 | I | static=False, +25-08-31 03:05:50 | I | kernel_gptq=None, +25-08-31 03:05:50 | I | low_rank=None, +25-08-31 03:05:50 | I | calib_range=None, +25-08-31 03:05:50 | I | skips=['down_resblock_shortcut', 'down_resblock_time_proj', 'down_sample', 'down_transformer_add_norm', 'down_transformer_norm', 'down_transformer_proj_in', 'down_transformer_proj_out', 'input_embed', 'mid_resblock_shortcut', 'mid_resblock_time_proj', 'mid_transformer_add_norm', 'mid_transformer_norm', 'mid_transformer_proj_in', 'mid_transformer_proj_out', 'output_embed', 'text_embed', 'time_embed', 'transformer_add_norm', 'transformer_norm', 'up_resblock_shortcut', 'up_resblock_time_proj', 'up_sample', 'up_transformer_add_norm', 'up_transformer_norm', 'up_transformer_proj_in', 'up_transformer_proj_out'], +25-08-31 03:05:50 | I | allow_unsigned=False), +25-08-31 03:05:50 | I | opts=DiffusionActivationQuantizerConfig( +25-08-31 03:05:50 | I | dtype=None, +25-08-31 03:05:50 | I | zero_point=None, +25-08-31 03:05:50 | I | group_shapes=((-1, -1, -1),), +25-08-31 03:05:50 | I | scale_dtypes=(None,), +25-08-31 03:05:50 | I | static=False, +25-08-31 03:05:50 | I | kernel_gptq=None, +25-08-31 03:05:50 | I | low_rank=None, +25-08-31 03:05:50 | I | calib_range=None, +25-08-31 03:05:50 | I | skips=[], +25-08-31 03:05:50 | I | allow_unsigned=False), +25-08-31 03:05:50 | I | extra_wgts=DiffusionExtraWeightQuantizerConfig( +25-08-31 03:05:50 | I | dtype=sint4, +25-08-31 03:05:50 | I | zero_point=None, +25-08-31 03:05:50 | I | group_shapes=((1, 64, 1, 1, 1),), +25-08-31 03:05:50 | I | scale_dtypes=(None,), +25-08-31 03:05:50 | I | static=True, +25-08-31 03:05:50 | I | kernel_gptq=None, +25-08-31 03:05:50 | I | low_rank=SkipBasedQuantLowRankCalibConfig( +25-08-31 03:05:50 | I | rank=32, +25-08-31 03:05:50 | I | exclusive=False, +25-08-31 03:05:50 | I | compensate=False, +25-08-31 03:05:50 | I | degree=2, +25-08-31 03:05:50 | I | objective=SearchBasedCalibObjective.OutputsError, +25-08-31 03:05:50 | I | strategy=SearchBasedCalibStrategy.Manual, +25-08-31 03:05:50 | I | granularity=SearchBasedCalibGranularity.Layer, +25-08-31 03:05:50 | I | element_batch_size=-1, +25-08-31 03:05:50 | I | sample_batch_size=16, +25-08-31 03:05:50 | I | element_size=-1, +25-08-31 03:05:50 | I | sample_size=-1, +25-08-31 03:05:50 | I | pre_reshape=True, +25-08-31 03:05:50 | I | outputs_device=cpu, +25-08-31 03:05:50 | I | num_iters=100, +25-08-31 03:05:50 | I | early_stop=True, +25-08-31 03:05:50 | I | skips=['down_resblock_conv', 'down_transformer_add_norm', 'down_transformer_norm', 'mid_resblock_conv', 'mid_transformer_add_norm', 'mid_transformer_norm', 'transformer_add_norm', 'transformer_norm', 'up_resblock_conv', 'up_transformer_add_norm', 'up_transformer_norm']), +25-08-31 03:05:50 | I | calib_range=SkipBasedDynamicRangeCalibConfig( +25-08-31 03:05:50 | I | degree=2, +25-08-31 03:05:50 | I | objective=SearchBasedCalibObjective.OutputsError, +25-08-31 03:05:50 | I | strategy=SearchBasedCalibStrategy.Manual, +25-08-31 03:05:50 | I | granularity=SearchBasedCalibGranularity.Layer, +25-08-31 03:05:50 | I | element_batch_size=-1, +25-08-31 03:05:50 | I | sample_batch_size=16, +25-08-31 03:05:50 | I | element_size=-1, +25-08-31 03:05:50 | I | sample_size=-1, +25-08-31 03:05:50 | I | pre_reshape=True, +25-08-31 03:05:50 | I | outputs_device=cpu, +25-08-31 03:05:50 | I | ratio=1.0, +25-08-31 03:05:50 | I | max_shrink=0.2, +25-08-31 03:05:50 | I | max_expand=1.0, +25-08-31 03:05:50 | I | num_grids=80, +25-08-31 03:05:50 | I | allow_scale=False, +25-08-31 03:05:50 | I | skips=[]), +25-08-31 03:05:50 | I | includes=['down_transformer_add_norm', 'down_transformer_norm', 'mid_transformer_add_norm', 'mid_transformer_norm', 'transformer_add_norm', 'transformer_norm', 'up_transformer_add_norm', 'up_transformer_norm']), +25-08-31 03:05:50 | I | unsigned_ipts=DiffusionActivationQuantizerConfig( +25-08-31 03:05:50 | I | dtype=sfp4_e2m1_all, +25-08-31 03:05:50 | I | zero_point=None, +25-08-31 03:05:50 | I | group_shapes=((1, 16, 1, 1, 1),), +25-08-31 03:05:50 | I | scale_dtypes=(sfp8_e4m3_nan,), +25-08-31 03:05:50 | I | static=False, +25-08-31 03:05:50 | I | kernel_gptq=None, +25-08-31 03:05:50 | I | low_rank=None, +25-08-31 03:05:50 | I | calib_range=None, +25-08-31 03:05:50 | I | skips=['down_resblock_shortcut', 'down_resblock_time_proj', 'down_sample', 'down_transformer_add_norm', 'down_transformer_norm', 'down_transformer_proj_in', 'down_transformer_proj_out', 'input_embed', 'mid_resblock_shortcut', 'mid_resblock_time_proj', 'mid_transformer_add_norm', 'mid_transformer_norm', 'mid_transformer_proj_in', 'mid_transformer_proj_out', 'output_embed', 'text_embed', 'time_embed', 'transformer_add_norm', 'transformer_norm', 'up_resblock_shortcut', 'up_resblock_time_proj', 'up_sample', 'up_transformer_add_norm', 'up_transformer_norm', 'up_transformer_proj_in', 'up_transformer_proj_out'], +25-08-31 03:05:50 | I | allow_unsigned=False), +25-08-31 03:05:50 | I | calib=DiffusionCalibCacheLoaderConfig( +25-08-31 03:05:50 | I | data=qdiff, +25-08-31 03:05:50 | I | num_samples=128, +25-08-31 03:05:50 | I | batch_size=16, +25-08-31 03:05:50 | I | path=/data/pingzhi/deepcompressor/examples/diffusion/datasets/torch.bfloat16/flux.1-dev-ghibli/fmeuler50-g3.5/qdiff/s128, +25-08-31 03:05:50 | I | num_workers=8), +25-08-31 03:05:50 | I | rotation=None, +25-08-31 03:05:50 | I | smooth=SmoothTransfomerConfig( +25-08-31 03:05:50 | I | proj=SkipBasedSmoothCalibConfig( +25-08-31 03:05:50 | I | degree=2, +25-08-31 03:05:50 | I | objective=SearchBasedCalibObjective.OutputsError, +25-08-31 03:05:50 | I | strategy=SearchBasedCalibStrategy.GridSearch, +25-08-31 03:05:50 | I | granularity=SearchBasedCalibGranularity.Layer, +25-08-31 03:05:50 | I | element_batch_size=-1, +25-08-31 03:05:50 | I | sample_batch_size=16, +25-08-31 03:05:50 | I | element_size=-1, +25-08-31 03:05:50 | I | sample_size=-1, +25-08-31 03:05:50 | I | pre_reshape=True, +25-08-31 03:05:50 | I | outputs_device=cpu, +25-08-31 03:05:50 | I | fuse_when_possible=False, +25-08-31 03:05:50 | I | allow_a_quant=True, +25-08-31 03:05:50 | I | allow_b_quant=True, +25-08-31 03:05:50 | I | spans=[(, )], +25-08-31 03:05:50 | I | a_spans=[], +25-08-31 03:05:50 | I | b_spans=[], +25-08-31 03:05:50 | I | alpha=0.5, +25-08-31 03:05:50 | I | beta=-2, +25-08-31 03:05:50 | I | num_grids=20, +25-08-31 03:05:50 | I | allow_low_rank=True, +25-08-31 03:05:50 | I | skips=['down_resblock_conv', 'down_transformer_add_norm', 'down_transformer_norm', 'mid_resblock_conv', 'mid_transformer_add_norm', 'mid_transformer_norm', 'transformer_add_norm', 'transformer_norm', 'up_resblock_conv', 'up_transformer_add_norm', 'up_transformer_norm']), +25-08-31 03:05:50 | I | attn=None), +25-08-31 03:05:50 | I | develop_dtype=torch.float32), +25-08-31 03:05:50 | I | text=None, +25-08-31 03:05:50 | I | text_cache=LlmCacheConfig( +25-08-31 03:05:50 | I | root=, +25-08-31 03:05:50 | I | dirpath=LlmQuantCacheConfig( +25-08-31 03:05:50 | I | rotation=, +25-08-31 03:05:50 | I | reorder=, +25-08-31 03:05:50 | I | smooth=, +25-08-31 03:05:50 | I | wgts=, +25-08-31 03:05:50 | I | acts=), +25-08-31 03:05:50 | I | path=LlmQuantCacheConfig( +25-08-31 03:05:50 | I | rotation=, +25-08-31 03:05:50 | I | reorder=, +25-08-31 03:05:50 | I | smooth=, +25-08-31 03:05:50 | I | wgts=, +25-08-31 03:05:50 | I | acts=)), +25-08-31 03:05:50 | I | seed=12345, +25-08-31 03:05:50 | I | skip_gen=True, +25-08-31 03:05:50 | I | skip_eval=True, +25-08-31 03:05:50 | I | load_from=, +25-08-31 03:05:50 | I | save_model=true, +25-08-31 03:05:50 | I | copy_on_save=False) +25-08-31 03:05:50 | I | === Dumped Configurations === +25-08-31 03:05:50 | I | { 'cache': {'root': 'runs'}, +25-08-31 03:05:50 | I | 'copy_on_save': False, +25-08-31 03:05:50 | I | 'enable_cache': True, +25-08-31 03:05:50 | I | 'enable_text': False, +25-08-31 03:05:50 | I | 'eval': { 'batch_size': 1, +25-08-31 03:05:50 | I | 'batch_size_per_gpu': 1, +25-08-31 03:05:50 | I | 'benchmarks': ['MJHQ', 'DCI'], +25-08-31 03:05:50 | I | 'chunk_only': False, +25-08-31 03:05:50 | I | 'chunk_start': 0, +25-08-31 03:05:50 | I | 'chunk_step': 1, +25-08-31 03:05:50 | I | 'clean_caption': None, +25-08-31 03:05:50 | I | 'control_root': 'benchmarks', +25-08-31 03:05:50 | I | 'gen_root': '{output}/{job}', +25-08-31 03:05:50 | I | 'gt_metrics': ['clip_iqa', 'clip_score', 'image_reward', 'fid'], +25-08-31 03:05:50 | I | 'gt_stats_root': 'benchmarks/stats', +25-08-31 03:05:50 | I | 'guidance_scale': 3.5, +25-08-31 03:05:50 | I | 'height': None, +25-08-31 03:05:50 | I | 'num_gpus': 1, +25-08-31 03:05:50 | I | 'num_samples': 5000, +25-08-31 03:05:50 | I | 'num_steps': 50, +25-08-31 03:05:50 | I | 'protocol': 'fmeuler50-g3.5', +25-08-31 03:05:50 | I | 'ref_metrics': ['psnr', 'lpips', 'ssim', 'fid'], +25-08-31 03:05:50 | I | 'ref_root': '/data/pingzhi/deepcompressor/examples/diffusion/baselines/torch.bfloat16/flux.1-dev-ghibli/fmeuler50-g3.5', +25-08-31 03:05:50 | I | 'width': None}, +25-08-31 03:05:50 | I | 'load_from': '', +25-08-31 03:05:50 | I | 'output': { 'dirname': 'skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000', +25-08-31 03:05:50 | I | 'job': 'run', +25-08-31 03:05:50 | I | 'root': 'runs'}, +25-08-31 03:05:50 | I | 'pipeline': { 'device': 'cuda', +25-08-31 03:05:50 | I | 'dtype': 'torch.bfloat16', +25-08-31 03:05:50 | I | 'enable_lora': False, +25-08-31 03:05:50 | I | 'name': 'flux.1-dev-ghibli', +25-08-31 03:05:50 | I | 'path': '/home/pingzhi/checkpoints/fused_flux_dev_studio_ghibli_bf16', +25-08-31 03:05:50 | I | 'shift_activations': False, +25-08-31 03:05:50 | I | 'task': 'text-to-image'}, +25-08-31 03:05:50 | I | 'quant': { 'calib': { 'batch_size': 16, +25-08-31 03:05:50 | I | 'data': 'qdiff', +25-08-31 03:05:50 | I | 'num_samples': 128, +25-08-31 03:05:50 | I | 'num_workers': 8, +25-08-31 03:05:50 | I | 'path': '/data/pingzhi/deepcompressor/examples/diffusion/datasets/torch.bfloat16/flux.1-dev-ghibli/fmeuler50-g3.5/qdiff/s128'}, +25-08-31 03:05:50 | I | 'develop_dtype': 'torch.float32', +25-08-31 03:05:50 | I | 'enable_extra_wgts': True, +25-08-31 03:05:50 | I | 'enable_rotation': False, +25-08-31 03:05:50 | I | 'enable_smooth': True, +25-08-31 03:05:50 | I | 'extra_wgts': { 'dtype': 'sint4', +25-08-31 03:05:50 | I | 'group_shapes': [[1, 64, 1, 1, 1]], +25-08-31 03:05:50 | I | 'includes': [ 'down_transformer_add_norm', +25-08-31 03:05:50 | I | 'down_transformer_norm', +25-08-31 03:05:50 | I | 'mid_transformer_add_norm', +25-08-31 03:05:50 | I | 'mid_transformer_norm', +25-08-31 03:05:50 | I | 'transformer_add_norm', +25-08-31 03:05:50 | I | 'transformer_norm', +25-08-31 03:05:50 | I | 'up_transformer_add_norm', +25-08-31 03:05:50 | I | 'up_transformer_norm'], +25-08-31 03:05:50 | I | 'scale_dtypes': [None], +25-08-31 03:05:50 | I | 'zero_point': None}, +25-08-31 03:05:50 | I | 'ipts': { 'allow_unsigned': False, +25-08-31 03:05:50 | I | 'dtype': 'sfp4_e2m1_all', +25-08-31 03:05:50 | I | 'enable_calib_range': False, +25-08-31 03:05:50 | I | 'group_shapes': [[1, 16, 1, 1, 1]], +25-08-31 03:05:50 | I | 'scale_dtypes': ['sfp8_e4m3_nan'], +25-08-31 03:05:50 | I | 'skips': [ 'down_resblock_shortcut', +25-08-31 03:05:50 | I | 'down_resblock_time_proj', +25-08-31 03:05:50 | I | 'down_sample', +25-08-31 03:05:50 | I | 'down_transformer_add_norm', +25-08-31 03:05:50 | I | 'down_transformer_norm', +25-08-31 03:05:50 | I | 'down_transformer_proj_in', +25-08-31 03:05:50 | I | 'down_transformer_proj_out', +25-08-31 03:05:50 | I | 'input_embed', +25-08-31 03:05:50 | I | 'mid_resblock_shortcut', +25-08-31 03:05:50 | I | 'mid_resblock_time_proj', +25-08-31 03:05:50 | I | 'mid_transformer_add_norm', +25-08-31 03:05:50 | I | 'mid_transformer_norm', +25-08-31 03:05:50 | I | 'mid_transformer_proj_in', +25-08-31 03:05:50 | I | 'mid_transformer_proj_out', +25-08-31 03:05:50 | I | 'output_embed', +25-08-31 03:05:50 | I | 'text_embed', +25-08-31 03:05:50 | I | 'time_embed', +25-08-31 03:05:50 | I | 'transformer_add_norm', +25-08-31 03:05:50 | I | 'transformer_norm', +25-08-31 03:05:50 | I | 'up_resblock_shortcut', +25-08-31 03:05:50 | I | 'up_resblock_time_proj', +25-08-31 03:05:50 | I | 'up_sample', +25-08-31 03:05:50 | I | 'up_transformer_add_norm', +25-08-31 03:05:50 | I | 'up_transformer_norm', +25-08-31 03:05:50 | I | 'up_transformer_proj_in', +25-08-31 03:05:50 | I | 'up_transformer_proj_out'], +25-08-31 03:05:50 | I | 'static': False, +25-08-31 03:05:50 | I | 'zero_point': None}, +25-08-31 03:05:50 | I | 'opts': { 'allow_unsigned': False, +25-08-31 03:05:50 | I | 'dtype': None, +25-08-31 03:05:50 | I | 'enable_calib_range': False, +25-08-31 03:05:50 | I | 'group_shapes': [[-1, -1, -1]], +25-08-31 03:05:50 | I | 'scale_dtypes': [None], +25-08-31 03:05:50 | I | 'skips': [], +25-08-31 03:05:50 | I | 'static': False, +25-08-31 03:05:50 | I | 'zero_point': None}, +25-08-31 03:05:50 | I | 'smooth': { 'enable_attn': False, +25-08-31 03:05:50 | I | 'enable_proj': True, +25-08-31 03:05:50 | I | 'proj': { 'allow_a_quant': True, +25-08-31 03:05:50 | I | 'allow_b_quant': True, +25-08-31 03:05:50 | I | 'allow_low_rank': True, +25-08-31 03:05:50 | I | 'alpha': 0.5, +25-08-31 03:05:50 | I | 'beta': -2, +25-08-31 03:05:50 | I | 'degree': 2, +25-08-31 03:05:50 | I | 'element_batch_size': -1, +25-08-31 03:05:50 | I | 'element_size': -1, +25-08-31 03:05:50 | I | 'fuse_when_possible': False, +25-08-31 03:05:50 | I | 'granularity': 'Layer', +25-08-31 03:05:50 | I | 'num_grids': 20, +25-08-31 03:05:50 | I | 'objective': 'OutputsError', +25-08-31 03:05:50 | I | 'outputs_device': 'cpu', +25-08-31 03:05:50 | I | 'pre_reshape': True, +25-08-31 03:05:50 | I | 'sample_batch_size': 16, +25-08-31 03:05:50 | I | 'sample_size': -1, +25-08-31 03:05:50 | I | 'skips': [ 'down_resblock_conv', +25-08-31 03:05:50 | I | 'down_transformer_add_norm', +25-08-31 03:05:50 | I | 'down_transformer_norm', +25-08-31 03:05:50 | I | 'mid_resblock_conv', +25-08-31 03:05:50 | I | 'mid_transformer_add_norm', +25-08-31 03:05:50 | I | 'mid_transformer_norm', +25-08-31 03:05:50 | I | 'transformer_add_norm', +25-08-31 03:05:50 | I | 'transformer_norm', +25-08-31 03:05:50 | I | 'up_resblock_conv', +25-08-31 03:05:50 | I | 'up_transformer_add_norm', +25-08-31 03:05:50 | I | 'up_transformer_norm'], +25-08-31 03:05:50 | I | 'spans': [['AbsMax', 'AbsMax']], +25-08-31 03:05:50 | I | 'strategy': 'GridSearch'}}, +25-08-31 03:05:50 | I | 'wgts': { 'calib_range': { 'allow_scale': False, +25-08-31 03:05:50 | I | 'degree': 2, +25-08-31 03:05:50 | I | 'element_batch_size': -1, +25-08-31 03:05:50 | I | 'element_size': -1, +25-08-31 03:05:50 | I | 'granularity': 'Layer', +25-08-31 03:05:50 | I | 'max_expand': 1.0, +25-08-31 03:05:50 | I | 'max_shrink': 0.2, +25-08-31 03:05:50 | I | 'num_grids': 80, +25-08-31 03:05:50 | I | 'objective': 'OutputsError', +25-08-31 03:05:50 | I | 'outputs_device': 'cpu', +25-08-31 03:05:50 | I | 'pre_reshape': True, +25-08-31 03:05:50 | I | 'ratio': 1.0, +25-08-31 03:05:50 | I | 'sample_batch_size': 16, +25-08-31 03:05:50 | I | 'sample_size': -1, +25-08-31 03:05:50 | I | 'skips': [], +25-08-31 03:05:50 | I | 'strategy': 'Manual'}, +25-08-31 03:05:50 | I | 'dtype': 'sfp4_e2m1_all', +25-08-31 03:05:50 | I | 'enable_calib_range': True, +25-08-31 03:05:50 | I | 'enable_kernel_gptq': False, +25-08-31 03:05:50 | I | 'enable_low_rank': True, +25-08-31 03:05:50 | I | 'group_shapes': [[-1, -1, -1], [1, 16, 1, 1, 1]], +25-08-31 03:05:50 | I | 'low_rank': { 'compensate': False, +25-08-31 03:05:50 | I | 'degree': 2, +25-08-31 03:05:50 | I | 'early_stop': True, +25-08-31 03:05:50 | I | 'exclusive': False, +25-08-31 03:05:50 | I | 'num_iters': 100, +25-08-31 03:05:50 | I | 'objective': 'OutputsError', +25-08-31 03:05:50 | I | 'outputs_device': 'cpu', +25-08-31 03:05:50 | I | 'rank': 32, +25-08-31 03:05:50 | I | 'sample_batch_size': 16, +25-08-31 03:05:50 | I | 'sample_size': -1, +25-08-31 03:05:50 | I | 'skips': [ 'down_resblock_conv', +25-08-31 03:05:50 | I | 'down_transformer_add_norm', +25-08-31 03:05:50 | I | 'down_transformer_norm', +25-08-31 03:05:50 | I | 'mid_resblock_conv', +25-08-31 03:05:50 | I | 'mid_transformer_add_norm', +25-08-31 03:05:50 | I | 'mid_transformer_norm', +25-08-31 03:05:50 | I | 'transformer_add_norm', +25-08-31 03:05:50 | I | 'transformer_norm', +25-08-31 03:05:50 | I | 'up_resblock_conv', +25-08-31 03:05:50 | I | 'up_transformer_add_norm', +25-08-31 03:05:50 | I | 'up_transformer_norm'], +25-08-31 03:05:50 | I | 'strategy': 'Manual'}, +25-08-31 03:05:50 | I | 'scale_dtypes': [None, 'sfp8_e4m3_nan'], +25-08-31 03:05:50 | I | 'skips': [ 'down_resblock_shortcut', +25-08-31 03:05:50 | I | 'down_resblock_time_proj', +25-08-31 03:05:50 | I | 'down_sample', +25-08-31 03:05:50 | I | 'down_transformer_proj_in', +25-08-31 03:05:50 | I | 'down_transformer_proj_out', +25-08-31 03:05:50 | I | 'input_embed', +25-08-31 03:05:50 | I | 'mid_resblock_shortcut', +25-08-31 03:05:50 | I | 'mid_resblock_time_proj', +25-08-31 03:05:50 | I | 'mid_transformer_proj_in', +25-08-31 03:05:50 | I | 'mid_transformer_proj_out', +25-08-31 03:05:50 | I | 'output_embed', +25-08-31 03:05:50 | I | 'text_embed', +25-08-31 03:05:50 | I | 'time_embed', +25-08-31 03:05:50 | I | 'up_resblock_shortcut', +25-08-31 03:05:50 | I | 'up_resblock_time_proj', +25-08-31 03:05:50 | I | 'up_sample', +25-08-31 03:05:50 | I | 'up_transformer_proj_in', +25-08-31 03:05:50 | I | 'up_transformer_proj_out'], +25-08-31 03:05:50 | I | 'zero_point': None}}, +25-08-31 03:05:50 | I | 'save_model': 'true', +25-08-31 03:05:50 | I | 'seed': 12345, +25-08-31 03:05:50 | I | 'skip_eval': True, +25-08-31 03:05:50 | I | 'skip_gen': True, +25-08-31 03:05:50 | I | 'text_cache': {'path': {'acts': '', 'reorder': '', 'rotation': '', 'smooth': '', 'wgts': ''}, 'root': ''}} +25-08-31 03:05:50 | I | === Output Directory === +25-08-31 03:05:50 | I | runs/diffusion/flux.1/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250831.030550 +25-08-31 03:05:50 | I | === Start Evaluating === +25-08-31 03:05:50 | I | * Building diffusion model pipeline +25-08-31 03:06:38 | I | Replacing fused Linear with ConcatLinear. +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.0 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.1 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.2 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.3 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.4 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.5 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.6 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.7 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.8 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.9 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.10 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.11 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.12 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.13 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.14 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.15 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.16 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.17 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.18 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.19 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.20 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.21 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.22 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.23 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.24 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.25 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.26 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.27 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.28 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.29 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.30 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.31 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.32 with ConcatLinear. +25-08-31 03:06:38 | I | - in_features = 3072/15360 +25-08-31 03:06:38 | I | - out_features = 3072 +25-08-31 03:06:38 | I | + Replacing fused Linear in single_transformer_blocks.33 with ConcatLinear. +25-08-31 03:06:39 | I | - in_features = 3072/15360 +25-08-31 03:06:39 | I | - out_features = 3072 +25-08-31 03:06:39 | I | + Replacing fused Linear in single_transformer_blocks.34 with ConcatLinear. +25-08-31 03:06:39 | I | - in_features = 3072/15360 +25-08-31 03:06:39 | I | - out_features = 3072 +25-08-31 03:06:39 | I | + Replacing fused Linear in single_transformer_blocks.35 with ConcatLinear. +25-08-31 03:06:39 | I | - in_features = 3072/15360 +25-08-31 03:06:39 | I | - out_features = 3072 +25-08-31 03:06:39 | I | + Replacing fused Linear in single_transformer_blocks.36 with ConcatLinear. +25-08-31 03:06:39 | I | - in_features = 3072/15360 +25-08-31 03:06:39 | I | - out_features = 3072 +25-08-31 03:06:39 | I | + Replacing fused Linear in single_transformer_blocks.37 with ConcatLinear. +25-08-31 03:06:39 | I | - in_features = 3072/15360 +25-08-31 03:06:39 | I | - out_features = 3072 +25-08-31 03:06:39 | I | * Smoothing model for quantization +25-08-31 03:06:39 | I | - Generating smooth scales +25-08-31 03:07:40 | D | - Smoothing Diffusion Block transformer_blocks.0 +25-08-31 03:07:40 | D | - Skipping Module transformer_blocks.0.norm1.linear +25-08-31 03:07:40 | D | - Skipping Module transformer_blocks.0.norm1_context.linear +25-08-31 03:07:40 | D | - Smoothing Transformer Block transformer_blocks.0 +25-08-31 03:07:40 | D | - transformer_blocks.0.attn.qkv_proj +25-08-31 03:07:40 | D | + w: sfp4_e2m1_all +25-08-31 03:07:40 | D | + x: sfp4_e2m1_all +25-08-31 03:07:40 | D | + y: None +25-08-31 03:07:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:07:40 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-31 03:07:40 | D | + x - AbsMax +25-08-31 03:07:40 | D | + x = [min=0.0962, max=17.8750] +25-08-31 03:07:40 | D | + w - AbsMax +25-08-31 03:07:40 | D | + w = [min=0.1064, max=2.4375] +25-08-31 03:07:40 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-31 03:07:42 | D | + finished calculating the original outputs, ram usage: 13.1 +25-08-31 03:08:58 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:08:58 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:08:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:08:58 | D | - sum error = [ 356.5586, 311.8923, 302.6056, 284.1847, 271.3865] +25-08-31 03:08:58 | D | - best error = [ 356.5586, 311.8923, 302.6056, 284.1847, 271.3865] +25-08-31 03:08:58 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:08:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:08:58 | D | - sum error = [ 252.4110, 250.0218, 246.8364, 250.0298, 235.0854] +25-08-31 03:08:58 | D | - best error = [ 252.4110, 250.0218, 246.8364, 246.8364, 235.0854] +25-08-31 03:08:58 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:08:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:08:58 | D | - sum error = [ 241.9111, 256.2963, 251.5036, 255.4668, 278.7960] +25-08-31 03:08:58 | D | - best error = [ 235.0854, 235.0854, 235.0854, 235.0854, 235.0854] +25-08-31 03:08:58 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:08:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:08:58 | D | - sum error = [ 291.7685, 315.8543, 343.0017, 383.7878, 421.7303] +25-08-31 03:08:58 | D | - best error = [ 235.0854, 235.0854, 235.0854, 235.0854, 235.0854] +25-08-31 03:08:58 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:08:58 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:08:58 | D | - sum error = [ 3103.8388, 2350.9204, 1595.1840, 1158.3004, 997.8141] +25-08-31 03:08:58 | D | - best error = [ 235.0854, 235.0854, 235.0854, 235.0854, 235.0854] +25-08-31 03:08:58 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:08:58 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:08:58 | D | - sum error = [ 861.8808, 813.3426, 705.5574, 616.7377, 543.1175] +25-08-31 03:08:58 | D | - best error = [ 235.0854, 235.0854, 235.0854, 235.0854, 235.0854] +25-08-31 03:08:58 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:08:58 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:08:58 | D | - sum error = [ 488.0511, 450.1110, 431.2394, 406.8612, 382.7191] +25-08-31 03:08:58 | D | - best error = [ 235.0854, 235.0854, 235.0854, 235.0854, 235.0854] +25-08-31 03:08:58 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:08:58 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:08:58 | D | - sum error = [ 382.5196, 396.8664, 411.0264, 436.6440] +25-08-31 03:08:58 | D | - best error = [ 235.0854, 235.0854, 235.0854, 235.0854] +25-08-31 03:08:58 | D | + error = 235.0854 +25-08-31 03:08:58 | D | + scale = [min=0.3487, max=3.6602] +25-08-31 03:08:58 | D | - transformer_blocks.0.attn add_qkv_proj +25-08-31 03:08:58 | D | + w: sfp4_e2m1_all +25-08-31 03:08:58 | D | + x: sfp4_e2m1_all +25-08-31 03:08:58 | D | + y: None +25-08-31 03:08:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:08:58 | D | + finished parsing calibration arguments, ram usage: 13.1 +25-08-31 03:08:58 | D | + x - AbsMax +25-08-31 03:08:58 | D | + x = [min=0.1777, max=18.3750] +25-08-31 03:08:58 | D | + w - AbsMax +25-08-31 03:08:58 | D | + w = [min=0.1182, max=0.5391] +25-08-31 03:08:58 | D | + finished resetting calibrator, ram usage: 13.1 +25-08-31 03:08:59 | D | + finished calculating the original outputs, ram usage: 13.2 +25-08-31 03:10:07 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:10:07 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:10:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:10:07 | D | - sum error = [ 438.8179, 424.2305, 419.1998, 421.6046, 401.8551] +25-08-31 03:10:07 | D | - best error = [ 438.8179, 424.2305, 419.1998, 419.1998, 401.8551] +25-08-31 03:10:07 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:10:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:10:07 | D | - sum error = [ 406.2308, 391.4600, 397.7796, 396.7752, 396.8558] +25-08-31 03:10:07 | D | - best error = [ 401.8551, 391.4600, 391.4600, 391.4600, 391.4600] +25-08-31 03:10:07 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:10:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:10:07 | D | - sum error = [ 396.2114, 396.0340, 387.9034, 387.2409, 388.7500] +25-08-31 03:10:07 | D | - best error = [ 391.4600, 391.4600, 387.9034, 387.2409, 387.2409] +25-08-31 03:10:07 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:10:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:10:07 | D | - sum error = [ 392.8233, 386.3419, 398.6804, 398.7908, 395.6270] +25-08-31 03:10:07 | D | - best error = [ 387.2409, 386.3419, 386.3419, 386.3419, 386.3419] +25-08-31 03:10:07 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:10:07 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:10:07 | D | - sum error = [ 558.0533, 519.7266, 507.3171, 472.6481, 469.8401] +25-08-31 03:10:07 | D | - best error = [ 386.3419, 386.3419, 386.3419, 386.3419, 386.3419] +25-08-31 03:10:07 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:10:07 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:10:07 | D | - sum error = [ 457.6208, 447.0441, 443.7885, 419.3147, 408.6699] +25-08-31 03:10:07 | D | - best error = [ 386.3419, 386.3419, 386.3419, 386.3419, 386.3419] +25-08-31 03:10:07 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:10:07 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:10:07 | D | - sum error = [ 412.1766, 392.6485, 399.3585, 407.6044, 405.6344] +25-08-31 03:10:07 | D | - best error = [ 386.3419, 386.3419, 386.3419, 386.3419, 386.3419] +25-08-31 03:10:07 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:10:07 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:10:07 | D | - sum error = [ 403.0923, 403.1493, 399.4857, 402.8885] +25-08-31 03:10:07 | D | - best error = [ 386.3419, 386.3419, 386.3419, 386.3419] +25-08-31 03:10:07 | D | + error = 386.3419 +25-08-31 03:10:07 | D | + scale = [min=0.2511, max=10.2655] +25-08-31 03:10:07 | D | - transformer_blocks.0.attn.out_proj + transformer_blocks.0.attn.add_out_proj +25-08-31 03:10:07 | D | + w: sfp4_e2m1_all +25-08-31 03:10:07 | D | + x: sfp4_e2m1_all +25-08-31 03:10:07 | D | + y: None +25-08-31 03:10:07 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:10:07 | D | + finished parsing calibration arguments, ram usage: 13.2 +25-08-31 03:10:08 | D | + x - AbsMax +25-08-31 03:10:08 | D | + x = [min=0.0562, max=1.6016] +25-08-31 03:10:08 | D | + w - AbsMax +25-08-31 03:10:08 | D | + w = [min=0.0986, max=0.4082] +25-08-31 03:10:08 | D | + finished resetting calibrator, ram usage: 13.2 +25-08-31 03:10:09 | D | + finished calculating the original outputs, ram usage: 13.2 +25-08-31 03:12:02 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:12:02 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:12:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:12:02 | D | - sum error = [ 1077.7829, 1048.9037, 1026.9693, 1001.8803, 993.0884] +25-08-31 03:12:02 | D | - best error = [ 1077.7829, 1048.9037, 1026.9693, 1001.8803, 993.0884] +25-08-31 03:12:02 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:12:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:12:02 | D | - sum error = [ 982.6774, 971.1181, 957.5537, 957.4575, 962.2749] +25-08-31 03:12:02 | D | - best error = [ 982.6774, 971.1181, 957.5537, 957.4575, 957.4575] +25-08-31 03:12:02 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:12:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:12:02 | D | - sum error = [ 956.9225, 951.8840, 958.8165, 964.9766, 971.8170] +25-08-31 03:12:02 | D | - best error = [ 956.9225, 951.8840, 951.8840, 951.8840, 951.8840] +25-08-31 03:12:02 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:12:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:12:02 | D | - sum error = [ 979.9018, 992.2067, 1009.5479, 1028.3555, 1051.4858] +25-08-31 03:12:02 | D | - best error = [ 951.8840, 951.8840, 951.8840, 951.8840, 951.8840] +25-08-31 03:12:02 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:12:02 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:12:02 | D | - sum error = [ 1217.5061, 1153.2582, 1108.2400, 1072.6781, 1037.4184] +25-08-31 03:12:02 | D | - best error = [ 951.8840, 951.8840, 951.8840, 951.8840, 951.8840] +25-08-31 03:12:02 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:12:02 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:12:02 | D | - sum error = [ 1013.8959, 995.6185, 981.8762, 972.4867, 966.7779] +25-08-31 03:12:02 | D | - best error = [ 951.8840, 951.8840, 951.8840, 951.8840, 951.8840] +25-08-31 03:12:02 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:12:02 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:12:02 | D | - sum error = [ 963.0847, 960.1530, 959.6817, 971.0732, 977.8662] +25-08-31 03:12:02 | D | - best error = [ 951.8840, 951.8840, 951.8840, 951.8840, 951.8840] +25-08-31 03:12:02 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:12:02 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:12:02 | D | - sum error = [ 985.2279, 996.9863, 1019.7615, 1046.5867] +25-08-31 03:12:02 | D | - best error = [ 951.8840, 951.8840, 951.8840, 951.8840] +25-08-31 03:12:02 | D | + error = 951.8840 +25-08-31 03:12:02 | D | + scale = [min=0.2052, max=1.2957] +25-08-31 03:12:02 | D | - transformer_blocks.0.ff.up_proj +25-08-31 03:12:02 | D | + w: sfp4_e2m1_all +25-08-31 03:12:02 | D | + x: sfp4_e2m1_all +25-08-31 03:12:02 | D | + y: None +25-08-31 03:12:02 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:12:02 | D | + finished parsing calibration arguments, ram usage: 13.2 +25-08-31 03:12:03 | D | + x - AbsMax +25-08-31 03:12:03 | D | + x = [min=0.0310, max=5.2500] +25-08-31 03:12:03 | D | + w - AbsMax +25-08-31 03:12:03 | D | + w = [min=0.0306, max=0.4395] +25-08-31 03:12:03 | D | + finished resetting calibrator, ram usage: 13.2 +25-08-31 03:12:09 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 03:13:28 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:13:28 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:13:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:13:28 | D | - sum error = [ 990.3964, 956.2081, 925.0955, 900.1734, 881.7526] +25-08-31 03:13:28 | D | - best error = [ 990.3964, 956.2081, 925.0955, 900.1734, 881.7526] +25-08-31 03:13:28 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:13:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:13:28 | D | - sum error = [ 863.5772, 853.2254, 834.5646, 826.6077, 817.8731] +25-08-31 03:13:28 | D | - best error = [ 863.5772, 853.2254, 834.5646, 826.6077, 817.8731] +25-08-31 03:13:28 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:13:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:13:28 | D | - sum error = [ 818.3932, 815.0753, 815.7369, 822.2446, 829.5510] +25-08-31 03:13:28 | D | - best error = [ 817.8731, 815.0753, 815.0753, 815.0753, 815.0753] +25-08-31 03:13:28 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:13:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:13:28 | D | - sum error = [ 844.7710, 863.9969, 888.1652, 919.4378, 956.2447] +25-08-31 03:13:28 | D | - best error = [ 815.0753, 815.0753, 815.0753, 815.0753, 815.0753] +25-08-31 03:13:28 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:13:28 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:13:28 | D | - sum error = [ 1481.5726, 1344.4732, 1233.2846, 1136.0150, 1055.0498] +25-08-31 03:13:28 | D | - best error = [ 815.0753, 815.0753, 815.0753, 815.0753, 815.0753] +25-08-31 03:13:28 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:13:28 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:13:28 | D | - sum error = [ 989.6116, 944.3936, 904.6745, 872.3317, 849.6454] +25-08-31 03:13:28 | D | - best error = [ 815.0753, 815.0753, 815.0753, 815.0753, 815.0753] +25-08-31 03:13:28 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:13:28 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:13:28 | D | - sum error = [ 833.2267, 818.2618, 813.7103, 812.6330, 818.0230] +25-08-31 03:13:28 | D | - best error = [ 815.0753, 815.0753, 813.7103, 812.6330, 812.6330] +25-08-31 03:13:28 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:13:28 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:13:28 | D | - sum error = [ 834.8145, 857.7323, 891.3022, 938.9424] +25-08-31 03:13:28 | D | - best error = [ 812.6330, 812.6330, 812.6330, 812.6330] +25-08-31 03:13:28 | D | + error = 812.6330 +25-08-31 03:13:28 | D | + scale = [min=0.2292, max=5.7264] +25-08-31 03:13:28 | D | - transformer_blocks.0.ff.down_proj +25-08-31 03:13:28 | D | + w: sfp4_e2m1_all +25-08-31 03:13:28 | D | + x: sfp4_e2m1_all +25-08-31 03:13:28 | D | + y: None +25-08-31 03:13:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:13:28 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 03:13:31 | D | + x - AbsMax +25-08-31 03:13:31 | D | + x = [min=0.0947, max=7.2500] +25-08-31 03:13:31 | D | + w - AbsMax +25-08-31 03:13:31 | D | + w = [min=0.0571, max=0.6055] +25-08-31 03:13:31 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 03:13:33 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 03:15:32 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:15:32 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:15:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:15:32 | D | - sum error = [ 1301.8256, 1286.3798, 1255.8895, 1256.1244, 1248.2023] +25-08-31 03:15:32 | D | - best error = [ 1301.8256, 1286.3798, 1255.8895, 1255.8895, 1248.2023] +25-08-31 03:15:32 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:15:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:15:32 | D | - sum error = [ 1247.7457, 1234.7772, 1263.6775, 1241.5742, 1281.7544] +25-08-31 03:15:32 | D | - best error = [ 1247.7457, 1234.7772, 1234.7772, 1234.7772, 1234.7772] +25-08-31 03:15:32 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:15:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:15:32 | D | - sum error = [ 1275.5264, 1285.5812, 1307.2545, 1304.1637, 1323.2497] +25-08-31 03:15:32 | D | - best error = [ 1234.7772, 1234.7772, 1234.7772, 1234.7772, 1234.7772] +25-08-31 03:15:32 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:15:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:15:32 | D | - sum error = [ 1329.9543, 1370.7710, 1387.5001, 1418.0314, 1463.2161] +25-08-31 03:15:32 | D | - best error = [ 1234.7772, 1234.7772, 1234.7772, 1234.7772, 1234.7772] +25-08-31 03:15:32 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:15:32 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:15:32 | D | - sum error = [ 1767.6140, 1591.2811, 1459.1478, 1481.4505, 1483.5511] +25-08-31 03:15:32 | D | - best error = [ 1234.7772, 1234.7772, 1234.7772, 1234.7772, 1234.7772] +25-08-31 03:15:32 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:15:32 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:15:32 | D | - sum error = [ 1361.5443, 1318.8424, 1333.7596, 1295.1121, 1289.1706] +25-08-31 03:15:32 | D | - best error = [ 1234.7772, 1234.7772, 1234.7772, 1234.7772, 1234.7772] +25-08-31 03:15:32 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:15:32 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:15:32 | D | - sum error = [ 1276.9972, 1271.2265, 1297.0895, 1310.0238, 1322.4592] +25-08-31 03:15:32 | D | - best error = [ 1234.7772, 1234.7772, 1234.7772, 1234.7772, 1234.7772] +25-08-31 03:15:32 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:15:32 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:15:32 | D | - sum error = [ 1339.2857, 1373.4176, 1389.1769, 1442.8688] +25-08-31 03:15:32 | D | - best error = [ 1234.7772, 1234.7772, 1234.7772, 1234.7772] +25-08-31 03:15:32 | D | + error = 1234.7772 +25-08-31 03:15:32 | D | + scale = [min=0.4931, max=1.8118] +25-08-31 03:15:32 | D | - transformer_blocks.0.ff_context.up_proj +25-08-31 03:15:32 | D | + w: sfp4_e2m1_all +25-08-31 03:15:32 | D | + x: sfp4_e2m1_all +25-08-31 03:15:32 | D | + y: None +25-08-31 03:15:32 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:15:32 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 03:15:32 | D | + x - AbsMax +25-08-31 03:15:32 | D | + x = [min=0.1562, max=11.8750] +25-08-31 03:15:32 | D | + w - AbsMax +25-08-31 03:15:32 | D | + w = [min=0.1162, max=0.4180] +25-08-31 03:15:32 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 03:15:33 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 03:16:17 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:16:17 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:16:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:16:17 | D | - sum error = [ 3678.3761, 3632.8876, 3578.5841, 3560.8478, 3519.8099] +25-08-31 03:16:17 | D | - best error = [ 3678.3761, 3632.8876, 3578.5841, 3560.8478, 3519.8099] +25-08-31 03:16:17 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:16:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:16:17 | D | - sum error = [ 3485.4630, 3442.0816, 3425.6375, 3402.7862, 3375.0239] +25-08-31 03:16:17 | D | - best error = [ 3485.4630, 3442.0816, 3425.6375, 3402.7862, 3375.0239] +25-08-31 03:16:17 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:16:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:16:17 | D | - sum error = [ 3371.3175, 3357.3904, 3359.6394, 3345.9486, 3362.4497] +25-08-31 03:16:17 | D | - best error = [ 3371.3175, 3357.3904, 3357.3904, 3345.9486, 3345.9486] +25-08-31 03:16:17 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:16:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:16:17 | D | - sum error = [ 3361.2236, 3381.2674, 3382.8501, 3413.7728, 3434.6109] +25-08-31 03:16:17 | D | - best error = [ 3345.9486, 3345.9486, 3345.9486, 3345.9486, 3345.9486] +25-08-31 03:16:17 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:16:17 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:16:17 | D | - sum error = [ 3709.1177, 3656.1778, 3605.2187, 3571.1425, 3535.5898] +25-08-31 03:16:17 | D | - best error = [ 3345.9486, 3345.9486, 3345.9486, 3345.9486, 3345.9486] +25-08-31 03:16:17 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:16:17 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:16:17 | D | - sum error = [ 3469.6689, 3464.1498, 3424.7228, 3405.8682, 3364.3545] +25-08-31 03:16:17 | D | - best error = [ 3345.9486, 3345.9486, 3345.9486, 3345.9486, 3345.9486] +25-08-31 03:16:17 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:16:17 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:16:17 | D | - sum error = [ 3352.0756, 3359.9213, 3340.9415, 3333.5202, 3349.1660] +25-08-31 03:16:17 | D | - best error = [ 3345.9486, 3345.9486, 3340.9415, 3333.5202, 3333.5202] +25-08-31 03:16:17 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:16:17 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:16:17 | D | - sum error = [ 3349.9687, 3379.0056, 3402.0845, 3431.4380] +25-08-31 03:16:17 | D | - best error = [ 3333.5202, 3333.5202, 3333.5202, 3333.5202] +25-08-31 03:16:17 | D | + error = 3333.5202 +25-08-31 03:16:17 | D | + scale = [min=0.4912, max=8.8823] +25-08-31 03:16:17 | D | - transformer_blocks.0.ff_context.down_proj +25-08-31 03:16:17 | D | + w: sfp4_e2m1_all +25-08-31 03:16:17 | D | + x: sfp4_e2m1_all +25-08-31 03:16:17 | D | + y: None +25-08-31 03:16:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:16:17 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 03:16:17 | D | + x - AbsMax +25-08-31 03:16:17 | D | + x = [min=0.0000, max=50.2500] +25-08-31 03:16:17 | D | + w - AbsMax +25-08-31 03:16:17 | D | + w = [min=0.0723, max=0.5039] +25-08-31 03:16:17 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 03:16:18 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 03:17:05 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:17:05 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:17:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:17:05 | D | - sum error = [ 1672.5772, 1631.4109, 1594.6307, 1596.4179, 1548.2970] +25-08-31 03:17:05 | D | - best error = [ 1672.5772, 1631.4109, 1594.6307, 1594.6307, 1548.2970] +25-08-31 03:17:05 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:17:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:17:05 | D | - sum error = [ 1533.3057, 1502.5385, 1481.6600, 1453.1603, 1472.0560] +25-08-31 03:17:05 | D | - best error = [ 1533.3057, 1502.5385, 1481.6600, 1453.1603, 1453.1603] +25-08-31 03:17:05 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:17:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:17:05 | D | - sum error = [ 1441.0392, 1404.9777, 1408.2782, 1386.3219, 1378.2844] +25-08-31 03:17:05 | D | - best error = [ 1441.0392, 1404.9777, 1404.9777, 1386.3219, 1378.2844] +25-08-31 03:17:05 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:17:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:17:05 | D | - sum error = [ 1368.1583, 1365.6122, 1372.0031, 1367.7325, 1367.9778] +25-08-31 03:17:05 | D | - best error = [ 1368.1583, 1365.6122, 1365.6122, 1365.6122, 1365.6122] +25-08-31 03:17:05 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:17:05 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:17:05 | D | - sum error = [ 1769.9873, 1745.8353, 1709.0518, 1676.6435, 1640.7605] +25-08-31 03:17:05 | D | - best error = [ 1365.6122, 1365.6122, 1365.6122, 1365.6122, 1365.6122] +25-08-31 03:17:05 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:17:05 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:17:05 | D | - sum error = [ 1620.4093, 1564.3746, 1545.3456, 1508.7193, 1493.3720] +25-08-31 03:17:05 | D | - best error = [ 1365.6122, 1365.6122, 1365.6122, 1365.6122, 1365.6122] +25-08-31 03:17:05 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:17:05 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:17:05 | D | - sum error = [ 1481.6520, 1449.3646, 1439.2359, 1424.4055, 1399.8118] +25-08-31 03:17:05 | D | - best error = [ 1365.6122, 1365.6122, 1365.6122, 1365.6122, 1365.6122] +25-08-31 03:17:05 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:17:05 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:17:05 | D | - sum error = [ 1399.9937, 1377.5583, 1382.6268, 1374.5310] +25-08-31 03:17:05 | D | - best error = [ 1365.6122, 1365.6122, 1365.6122, 1365.6122] +25-08-31 03:17:05 | D | + error = 1365.6122 +25-08-31 03:17:05 | D | + scale = [min=0.0000, max=22.9567] +25-08-31 03:17:24 | D | - Smoothing Diffusion Block transformer_blocks.1 +25-08-31 03:17:24 | D | - Skipping Module transformer_blocks.1.norm1.linear +25-08-31 03:17:24 | D | - Skipping Module transformer_blocks.1.norm1_context.linear +25-08-31 03:17:24 | D | - Smoothing Transformer Block transformer_blocks.1 +25-08-31 03:17:24 | D | - transformer_blocks.1.attn.qkv_proj +25-08-31 03:17:24 | D | + w: sfp4_e2m1_all +25-08-31 03:17:24 | D | + x: sfp4_e2m1_all +25-08-31 03:17:24 | D | + y: None +25-08-31 03:17:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:17:24 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 03:17:25 | D | + x - AbsMax +25-08-31 03:17:25 | D | + x = [min=0.0437, max=19.7500] +25-08-31 03:17:25 | D | + w - AbsMax +25-08-31 03:17:25 | D | + w = [min=0.0923, max=1.0000] +25-08-31 03:17:25 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 03:17:26 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 03:18:41 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:18:41 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:18:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:18:41 | D | - sum error = [ 1039.0571, 910.9609, 806.7593, 742.7651, 700.9683] +25-08-31 03:18:41 | D | - best error = [ 1039.0571, 910.9609, 806.7593, 742.7651, 700.9683] +25-08-31 03:18:41 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:18:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:18:41 | D | - sum error = [ 666.7921, 668.7378, 631.6583, 636.2095, 659.4668] +25-08-31 03:18:41 | D | - best error = [ 666.7921, 666.7921, 631.6583, 631.6583, 631.6583] +25-08-31 03:18:41 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:18:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:18:41 | D | - sum error = [ 690.6208, 704.2334, 712.7111, 754.4236, 793.8013] +25-08-31 03:18:41 | D | - best error = [ 631.6583, 631.6583, 631.6583, 631.6583, 631.6583] +25-08-31 03:18:41 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:18:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:18:41 | D | - sum error = [ 892.3331, 1021.9028, 1106.1805, 1210.2129, 1353.5286] +25-08-31 03:18:41 | D | - best error = [ 631.6583, 631.6583, 631.6583, 631.6583, 631.6583] +25-08-31 03:18:41 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:18:41 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:18:41 | D | - sum error = [ 1850.7533, 1707.4344, 1546.1469, 1421.7393, 1304.8403] +25-08-31 03:18:41 | D | - best error = [ 631.6583, 631.6583, 631.6583, 631.6583, 631.6583] +25-08-31 03:18:41 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:18:41 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:18:41 | D | - sum error = [ 1171.9676, 1133.2660, 1054.5938, 977.4663, 976.9852] +25-08-31 03:18:41 | D | - best error = [ 631.6583, 631.6583, 631.6583, 631.6583, 631.6583] +25-08-31 03:18:41 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:18:41 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:18:41 | D | - sum error = [ 946.9222, 918.0418, 927.0069, 955.4850, 979.8971] +25-08-31 03:18:41 | D | - best error = [ 631.6583, 631.6583, 631.6583, 631.6583, 631.6583] +25-08-31 03:18:41 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:18:41 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:18:41 | D | - sum error = [ 1071.1394, 1171.1505, 1204.7059, 1377.9531] +25-08-31 03:18:41 | D | - best error = [ 631.6583, 631.6583, 631.6583, 631.6583] +25-08-31 03:18:41 | D | + error = 631.6583 +25-08-31 03:18:41 | D | + scale = [min=0.3343, max=2.8409] +25-08-31 03:18:41 | D | - transformer_blocks.1.attn add_qkv_proj +25-08-31 03:18:41 | D | + w: sfp4_e2m1_all +25-08-31 03:18:41 | D | + x: sfp4_e2m1_all +25-08-31 03:18:41 | D | + y: None +25-08-31 03:18:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:18:41 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 03:18:41 | D | + x - AbsMax +25-08-31 03:18:41 | D | + x = [min=0.1035, max=39.2500] +25-08-31 03:18:41 | D | + w - AbsMax +25-08-31 03:18:41 | D | + w = [min=0.1167, max=0.3828] +25-08-31 03:18:41 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 03:18:42 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 03:19:46 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:19:46 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:19:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:19:46 | D | - sum error = [ 853.0336, 813.6862, 798.6293, 763.8513, 742.9359] +25-08-31 03:19:46 | D | - best error = [ 853.0336, 813.6862, 798.6293, 763.8513, 742.9359] +25-08-31 03:19:46 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:19:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:19:46 | D | - sum error = [ 792.9866, 758.0762, 767.5272, 711.9927, 691.9890] +25-08-31 03:19:46 | D | - best error = [ 742.9359, 742.9359, 742.9359, 711.9927, 691.9890] +25-08-31 03:19:46 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:19:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:19:46 | D | - sum error = [ 752.1123, 702.9672, 708.3039, 715.3037, 752.7271] +25-08-31 03:19:46 | D | - best error = [ 691.9890, 691.9890, 691.9890, 691.9890, 691.9890] +25-08-31 03:19:46 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:19:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:19:46 | D | - sum error = [ 715.4618, 795.9769, 789.9687, 822.4223, 857.2500] +25-08-31 03:19:46 | D | - best error = [ 691.9890, 691.9890, 691.9890, 691.9890, 691.9890] +25-08-31 03:19:46 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:19:46 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:19:46 | D | - sum error = [ 1179.1532, 1089.1167, 999.9468, 901.5057, 865.2015] +25-08-31 03:19:46 | D | - best error = [ 691.9890, 691.9890, 691.9890, 691.9890, 691.9890] +25-08-31 03:19:46 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:19:46 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:19:46 | D | - sum error = [ 840.1831, 810.0735, 773.2344, 797.6105, 831.7405] +25-08-31 03:19:46 | D | - best error = [ 691.9890, 691.9890, 691.9890, 691.9890, 691.9890] +25-08-31 03:19:46 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:19:46 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:19:46 | D | - sum error = [ 830.3164, 755.7052, 740.7227, 739.6959, 728.2568] +25-08-31 03:19:46 | D | - best error = [ 691.9890, 691.9890, 691.9890, 691.9890, 691.9890] +25-08-31 03:19:46 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:19:46 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:19:46 | D | - sum error = [ 742.6032, 757.0208, 813.7505, 888.6485] +25-08-31 03:19:46 | D | - best error = [ 691.9890, 691.9890, 691.9890, 691.9890] +25-08-31 03:19:46 | D | + error = 691.9890 +25-08-31 03:19:46 | D | + scale = [min=0.3604, max=5.2147] +25-08-31 03:19:46 | D | - transformer_blocks.1.attn.out_proj + transformer_blocks.1.attn.add_out_proj +25-08-31 03:19:46 | D | + w: sfp4_e2m1_all +25-08-31 03:19:46 | D | + x: sfp4_e2m1_all +25-08-31 03:19:46 | D | + y: None +25-08-31 03:19:46 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:19:46 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 03:19:47 | D | + x - AbsMax +25-08-31 03:19:47 | D | + x = [min=0.1748, max=7.1875] +25-08-31 03:19:47 | D | + w - AbsMax +25-08-31 03:19:47 | D | + w = [min=0.1064, max=0.4746] +25-08-31 03:19:47 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 03:19:48 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 03:21:35 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:21:35 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:21:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:21:35 | D | - sum error = [ 2542.8828, 2510.8274, 2471.3835, 2436.1724, 2412.0069] +25-08-31 03:21:35 | D | - best error = [ 2542.8828, 2510.8274, 2471.3835, 2436.1724, 2412.0069] +25-08-31 03:21:35 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:21:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:21:35 | D | - sum error = [ 2400.3435, 2366.8735, 2364.8562, 2355.6481, 2351.3316] +25-08-31 03:21:35 | D | - best error = [ 2400.3435, 2366.8735, 2364.8562, 2355.6481, 2351.3316] +25-08-31 03:21:35 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:21:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:21:35 | D | - sum error = [ 2360.6418, 2378.5437, 2377.0501, 2381.8422, 2413.1091] +25-08-31 03:21:35 | D | - best error = [ 2351.3316, 2351.3316, 2351.3316, 2351.3316, 2351.3316] +25-08-31 03:21:35 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:21:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:21:35 | D | - sum error = [ 2427.2470, 2430.5594, 2471.9026, 2521.7611, 2575.2184] +25-08-31 03:21:35 | D | - best error = [ 2351.3316, 2351.3316, 2351.3316, 2351.3316, 2351.3316] +25-08-31 03:21:35 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:21:35 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:21:35 | D | - sum error = [ 2637.9793, 2597.9543, 2572.6262, 2512.0361, 2484.6846] +25-08-31 03:21:35 | D | - best error = [ 2351.3316, 2351.3316, 2351.3316, 2351.3316, 2351.3316] +25-08-31 03:21:35 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:21:35 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:21:35 | D | - sum error = [ 2451.9264, 2426.0088, 2394.0658, 2400.2897, 2385.9706] +25-08-31 03:21:35 | D | - best error = [ 2351.3316, 2351.3316, 2351.3316, 2351.3316, 2351.3316] +25-08-31 03:21:35 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:21:35 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:21:35 | D | - sum error = [ 2380.8576, 2387.2125, 2398.7338, 2408.3756, 2436.9920] +25-08-31 03:21:35 | D | - best error = [ 2351.3316, 2351.3316, 2351.3316, 2351.3316, 2351.3316] +25-08-31 03:21:35 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:21:35 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:21:35 | D | - sum error = [ 2470.3453, 2475.5167, 2510.2939, 2559.1753] +25-08-31 03:21:35 | D | - best error = [ 2351.3316, 2351.3316, 2351.3316, 2351.3316] +25-08-31 03:21:35 | D | + error = 2351.3316 +25-08-31 03:21:35 | D | + scale = [min=0.4562, max=2.4292] +25-08-31 03:21:36 | D | - transformer_blocks.1.ff.up_proj +25-08-31 03:21:36 | D | + w: sfp4_e2m1_all +25-08-31 03:21:36 | D | + x: sfp4_e2m1_all +25-08-31 03:21:36 | D | + y: None +25-08-31 03:21:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:21:36 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 03:21:36 | D | + x - AbsMax +25-08-31 03:21:36 | D | + x = [min=0.0186, max=12.0625] +25-08-31 03:21:36 | D | + w - AbsMax +25-08-31 03:21:36 | D | + w = [min=0.0309, max=0.4727] +25-08-31 03:21:36 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 03:21:37 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 03:22:52 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:22:52 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:22:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:22:52 | D | - sum error = [ 1243.4679, 1203.3214, 1168.5217, 1136.1849, 1112.3327] +25-08-31 03:22:52 | D | - best error = [ 1243.4679, 1203.3214, 1168.5217, 1136.1849, 1112.3327] +25-08-31 03:22:52 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:22:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:22:52 | D | - sum error = [ 1088.3545, 1070.5271, 1056.3540, 1044.9641, 1038.2665] +25-08-31 03:22:52 | D | - best error = [ 1088.3545, 1070.5271, 1056.3540, 1044.9641, 1038.2665] +25-08-31 03:22:52 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:22:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:22:52 | D | - sum error = [ 1034.2524, 1034.5392, 1035.3827, 1042.5725, 1053.8019] +25-08-31 03:22:52 | D | - best error = [ 1034.2524, 1034.2524, 1034.2524, 1034.2524, 1034.2524] +25-08-31 03:22:52 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:22:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:22:52 | D | - sum error = [ 1069.8767, 1096.2680, 1131.2205, 1181.0824, 1244.5646] +25-08-31 03:22:52 | D | - best error = [ 1034.2524, 1034.2524, 1034.2524, 1034.2524, 1034.2524] +25-08-31 03:22:52 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:22:52 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:22:52 | D | - sum error = [ 1761.4676, 1612.3095, 1497.5595, 1402.8427, 1328.4952] +25-08-31 03:22:52 | D | - best error = [ 1034.2524, 1034.2524, 1034.2524, 1034.2524, 1034.2524] +25-08-31 03:22:52 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:22:52 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:22:52 | D | - sum error = [ 1270.3064, 1210.7022, 1161.2755, 1124.1599, 1090.7408] +25-08-31 03:22:52 | D | - best error = [ 1034.2524, 1034.2524, 1034.2524, 1034.2524, 1034.2524] +25-08-31 03:22:52 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:22:52 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:22:52 | D | - sum error = [ 1073.4015, 1059.7320, 1050.6395, 1051.1406, 1057.3388] +25-08-31 03:22:52 | D | - best error = [ 1034.2524, 1034.2524, 1034.2524, 1034.2524, 1034.2524] +25-08-31 03:22:52 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:22:52 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:22:52 | D | - sum error = [ 1075.7134, 1109.3959, 1158.9790, 1227.7805] +25-08-31 03:22:52 | D | - best error = [ 1034.2524, 1034.2524, 1034.2524, 1034.2524] +25-08-31 03:22:52 | D | + error = 1034.2524 +25-08-31 03:22:52 | D | + scale = [min=0.1362, max=3.4731] +25-08-31 03:22:52 | D | - transformer_blocks.1.ff.down_proj +25-08-31 03:22:52 | D | + w: sfp4_e2m1_all +25-08-31 03:22:52 | D | + x: sfp4_e2m1_all +25-08-31 03:22:52 | D | + y: None +25-08-31 03:22:52 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:22:52 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 03:22:55 | D | + x - AbsMax +25-08-31 03:22:55 | D | + x = [min=0.1699, max=9.8750] +25-08-31 03:22:55 | D | + w - AbsMax +25-08-31 03:22:55 | D | + w = [min=0.0776, max=0.5977] +25-08-31 03:22:55 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 03:22:57 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 03:24:46 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:24:46 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:24:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:24:46 | D | - sum error = [ 2186.9864, 2192.4302, 2186.6537, 2181.5374, 2187.8526] +25-08-31 03:24:46 | D | - best error = [ 2186.9864, 2186.9864, 2186.6537, 2181.5374, 2181.5374] +25-08-31 03:24:46 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:24:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:24:46 | D | - sum error = [ 2203.5450, 2195.2379, 2199.5093, 2188.0322, 2200.7955] +25-08-31 03:24:46 | D | - best error = [ 2181.5374, 2181.5374, 2181.5374, 2181.5374, 2181.5374] +25-08-31 03:24:46 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:24:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:24:46 | D | - sum error = [ 2214.2307, 2202.5701, 2210.7392, 2198.4804, 2217.8222] +25-08-31 03:24:46 | D | - best error = [ 2181.5374, 2181.5374, 2181.5374, 2181.5374, 2181.5374] +25-08-31 03:24:46 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:24:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:24:46 | D | - sum error = [ 2194.3615, 2219.8575, 2236.2656, 2235.5608, 2298.9472] +25-08-31 03:24:46 | D | - best error = [ 2181.5374, 2181.5374, 2181.5374, 2181.5374, 2181.5374] +25-08-31 03:24:46 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:24:46 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:24:46 | D | - sum error = [ 2351.7677, 2328.3287, 2323.1667, 2271.6582, 2260.7503] +25-08-31 03:24:46 | D | - best error = [ 2181.5374, 2181.5374, 2181.5374, 2181.5374, 2181.5374] +25-08-31 03:24:46 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:24:46 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:24:46 | D | - sum error = [ 2269.1087, 2247.0696, 2244.2511, 2264.3857, 2224.3815] +25-08-31 03:24:46 | D | - best error = [ 2181.5374, 2181.5374, 2181.5374, 2181.5374, 2181.5374] +25-08-31 03:24:46 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:24:46 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:24:46 | D | - sum error = [ 2214.5793, 2210.6189, 2233.9102, 2204.0715, 2219.9984] +25-08-31 03:24:46 | D | - best error = [ 2181.5374, 2181.5374, 2181.5374, 2181.5374, 2181.5374] +25-08-31 03:24:46 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:24:46 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:24:46 | D | - sum error = [ 2232.7348, 2242.7514, 2254.3496, 2267.5944] +25-08-31 03:24:46 | D | - best error = [ 2181.5374, 2181.5374, 2181.5374, 2181.5374] +25-08-31 03:24:46 | D | + error = 2181.5374 +25-08-31 03:24:46 | D | + scale = [min=0.7665, max=1.4099] +25-08-31 03:24:46 | D | - transformer_blocks.1.ff_context.up_proj +25-08-31 03:24:46 | D | + w: sfp4_e2m1_all +25-08-31 03:24:46 | D | + x: sfp4_e2m1_all +25-08-31 03:24:46 | D | + y: None +25-08-31 03:24:46 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:24:46 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 03:24:46 | D | + x - AbsMax +25-08-31 03:24:46 | D | + x = [min=0.1338, max=68.5000] +25-08-31 03:24:46 | D | + w - AbsMax +25-08-31 03:24:46 | D | + w = [min=0.1069, max=0.4043] +25-08-31 03:24:46 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 03:24:47 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 03:25:30 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:25:30 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:25:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:25:30 | D | - sum error = [ 3801.6027, 3665.6128, 3508.6857, 3380.5090, 3277.5238] +25-08-31 03:25:30 | D | - best error = [ 3801.6027, 3665.6128, 3508.6857, 3380.5090, 3277.5238] +25-08-31 03:25:30 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:25:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:25:30 | D | - sum error = [ 3172.0892, 3089.1819, 3023.7156, 2955.0929, 2902.5887] +25-08-31 03:25:30 | D | - best error = [ 3172.0892, 3089.1819, 3023.7156, 2955.0929, 2902.5887] +25-08-31 03:25:30 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:25:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:25:30 | D | - sum error = [ 2845.9754, 2805.6029, 2805.4644, 2772.2118, 2758.3885] +25-08-31 03:25:30 | D | - best error = [ 2845.9754, 2805.6029, 2805.4644, 2772.2118, 2758.3885] +25-08-31 03:25:30 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:25:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:25:30 | D | - sum error = [ 2765.4731, 2787.7036, 2827.5410, 2858.4093, 2900.4420] +25-08-31 03:25:30 | D | - best error = [ 2758.3885, 2758.3885, 2758.3885, 2758.3885, 2758.3885] +25-08-31 03:25:30 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:25:30 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:25:30 | D | - sum error = [ 3794.1877, 3615.2765, 3464.5616, 3364.9389, 3199.0771] +25-08-31 03:25:30 | D | - best error = [ 2758.3885, 2758.3885, 2758.3885, 2758.3885, 2758.3885] +25-08-31 03:25:30 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:25:30 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:25:30 | D | - sum error = [ 3145.2976, 3058.5820, 2967.9067, 2914.5988, 2875.7616] +25-08-31 03:25:30 | D | - best error = [ 2758.3885, 2758.3885, 2758.3885, 2758.3885, 2758.3885] +25-08-31 03:25:30 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:25:30 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:25:30 | D | - sum error = [ 2833.8136, 2801.7943, 2791.5985, 2798.1565, 2792.0484] +25-08-31 03:25:30 | D | - best error = [ 2758.3885, 2758.3885, 2758.3885, 2758.3885, 2758.3885] +25-08-31 03:25:30 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:25:30 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:25:30 | D | - sum error = [ 2797.4407, 2812.6905, 2858.5112, 2881.6432] +25-08-31 03:25:30 | D | - best error = [ 2758.3885, 2758.3885, 2758.3885, 2758.3885] +25-08-31 03:25:30 | D | + error = 2758.3885 +25-08-31 03:25:30 | D | + scale = [min=0.2446, max=19.2745] +25-08-31 03:25:30 | D | - transformer_blocks.1.ff_context.down_proj +25-08-31 03:25:30 | D | + w: sfp4_e2m1_all +25-08-31 03:25:30 | D | + x: sfp4_e2m1_all +25-08-31 03:25:30 | D | + y: None +25-08-31 03:25:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:25:30 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 03:25:30 | D | + x - AbsMax +25-08-31 03:25:30 | D | + x = [min=0.0000, max=86.5000] +25-08-31 03:25:30 | D | + w - AbsMax +25-08-31 03:25:30 | D | + w = [min=0.0295, max=0.7656] +25-08-31 03:25:30 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 03:25:31 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 03:26:19 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:26:19 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:26:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:26:19 | D | - sum error = [ 2463.3502, 2267.2630, 2142.2948, 2077.2606, 2036.6622] +25-08-31 03:26:19 | D | - best error = [ 2463.3502, 2267.2630, 2142.2948, 2077.2606, 2036.6622] +25-08-31 03:26:19 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:26:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:26:19 | D | - sum error = [ 1977.4862, 1863.4051, 1835.3205, 1790.5689, 1747.6702] +25-08-31 03:26:19 | D | - best error = [ 1977.4862, 1863.4051, 1835.3205, 1790.5689, 1747.6702] +25-08-31 03:26:19 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:26:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:26:19 | D | - sum error = [ 1671.4234, 1635.9184, 1616.7019, 1589.8509, 1586.2097] +25-08-31 03:26:19 | D | - best error = [ 1671.4234, 1635.9184, 1616.7019, 1589.8509, 1586.2097] +25-08-31 03:26:19 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:26:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:26:19 | D | - sum error = [ 1574.5734, 1518.6357, 1503.8380, 1501.5151, 1506.4846] +25-08-31 03:26:19 | D | - best error = [ 1574.5734, 1518.6357, 1503.8380, 1501.5151, 1501.5151] +25-08-31 03:26:19 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:26:19 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:26:19 | D | - sum error = [ 2920.2776, 2643.4578, 2438.4503, 2247.4403, 2165.6946] +25-08-31 03:26:19 | D | - best error = [ 1501.5151, 1501.5151, 1501.5151, 1501.5151, 1501.5151] +25-08-31 03:26:19 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:26:19 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:26:19 | D | - sum error = [ 2090.6677, 1999.8440, 1920.3848, 1877.9871, 1817.5340] +25-08-31 03:26:19 | D | - best error = [ 1501.5151, 1501.5151, 1501.5151, 1501.5151, 1501.5151] +25-08-31 03:26:19 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:26:19 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:26:19 | D | - sum error = [ 1757.7659, 1703.8135, 1672.0403, 1644.7562, 1579.6187] +25-08-31 03:26:19 | D | - best error = [ 1501.5151, 1501.5151, 1501.5151, 1501.5151, 1501.5151] +25-08-31 03:26:19 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:26:19 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:26:19 | D | - sum error = [ 1556.7609, 1517.1091, 1492.3733, 1508.1684] +25-08-31 03:26:19 | D | - best error = [ 1501.5151, 1501.5151, 1492.3733, 1492.3733] +25-08-31 03:26:19 | D | + error = 1492.3733 +25-08-31 03:26:19 | D | + scale = [min=0.0000, max=64.1834] +25-08-31 03:26:38 | D | - Smoothing Diffusion Block transformer_blocks.2 +25-08-31 03:26:38 | D | - Skipping Module transformer_blocks.2.norm1.linear +25-08-31 03:26:38 | D | - Skipping Module transformer_blocks.2.norm1_context.linear +25-08-31 03:26:38 | D | - Smoothing Transformer Block transformer_blocks.2 +25-08-31 03:26:38 | D | - transformer_blocks.2.attn.qkv_proj +25-08-31 03:26:38 | D | + w: sfp4_e2m1_all +25-08-31 03:26:38 | D | + x: sfp4_e2m1_all +25-08-31 03:26:38 | D | + y: None +25-08-31 03:26:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:26:38 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:26:39 | D | + x - AbsMax +25-08-31 03:26:39 | D | + x = [min=0.0201, max=11.4375] +25-08-31 03:26:39 | D | + w - AbsMax +25-08-31 03:26:39 | D | + w = [min=0.0923, max=0.9727] +25-08-31 03:26:39 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:26:40 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:27:54 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:27:54 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:27:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:27:54 | D | - sum error = [ 1509.1827, 1341.9314, 1257.7683, 1255.9262, 1158.0855] +25-08-31 03:27:54 | D | - best error = [ 1509.1827, 1341.9314, 1257.7683, 1255.9262, 1158.0855] +25-08-31 03:27:54 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:27:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:27:54 | D | - sum error = [ 1133.2200, 1123.7535, 1132.3154, 1126.7653, 1174.6503] +25-08-31 03:27:54 | D | - best error = [ 1133.2200, 1123.7535, 1123.7535, 1123.7535, 1123.7535] +25-08-31 03:27:54 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:27:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:27:54 | D | - sum error = [ 1249.1209, 1265.7314, 1344.4371, 1458.2922, 1624.9372] +25-08-31 03:27:54 | D | - best error = [ 1123.7535, 1123.7535, 1123.7535, 1123.7535, 1123.7535] +25-08-31 03:27:54 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:27:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:27:54 | D | - sum error = [ 1794.6714, 1983.0828, 2102.0156, 2314.5060, 2511.9689] +25-08-31 03:27:54 | D | - best error = [ 1123.7535, 1123.7535, 1123.7535, 1123.7535, 1123.7535] +25-08-31 03:27:54 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:27:54 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:27:54 | D | - sum error = [ 2900.1896, 2506.5264, 2348.7562, 2130.8597, 2065.5135] +25-08-31 03:27:54 | D | - best error = [ 1123.7535, 1123.7535, 1123.7535, 1123.7535, 1123.7535] +25-08-31 03:27:54 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:27:54 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:27:54 | D | - sum error = [ 1917.2109, 1816.9424, 1723.4839, 1573.2048, 1620.0741] +25-08-31 03:27:54 | D | - best error = [ 1123.7535, 1123.7535, 1123.7535, 1123.7535, 1123.7535] +25-08-31 03:27:54 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:27:54 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:27:54 | D | - sum error = [ 1612.4987, 1531.9716, 1669.3302, 1743.2473, 1881.4762] +25-08-31 03:27:54 | D | - best error = [ 1123.7535, 1123.7535, 1123.7535, 1123.7535, 1123.7535] +25-08-31 03:27:54 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:27:54 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:27:54 | D | - sum error = [ 2039.9716, 2266.6786, 2288.3777, 2521.9375] +25-08-31 03:27:54 | D | - best error = [ 1123.7535, 1123.7535, 1123.7535, 1123.7535] +25-08-31 03:27:54 | D | + error = 1123.7535 +25-08-31 03:27:54 | D | + scale = [min=0.3099, max=2.0773] +25-08-31 03:27:54 | D | - transformer_blocks.2.attn add_qkv_proj +25-08-31 03:27:54 | D | + w: sfp4_e2m1_all +25-08-31 03:27:54 | D | + x: sfp4_e2m1_all +25-08-31 03:27:54 | D | + y: None +25-08-31 03:27:54 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:27:54 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:27:55 | D | + x - AbsMax +25-08-31 03:27:55 | D | + x = [min=0.0464, max=34.2500] +25-08-31 03:27:55 | D | + w - AbsMax +25-08-31 03:27:55 | D | + w = [min=0.1064, max=0.6797] +25-08-31 03:27:55 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:27:56 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:28:59 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:28:59 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:28:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:28:59 | D | - sum error = [ 671.2869, 571.9364, 511.4663, 498.3865, 464.6773] +25-08-31 03:28:59 | D | - best error = [ 671.2869, 571.9364, 511.4663, 498.3865, 464.6773] +25-08-31 03:28:59 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:28:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:28:59 | D | - sum error = [ 461.6431, 444.3494, 431.7618, 388.1408, 387.3063] +25-08-31 03:28:59 | D | - best error = [ 461.6431, 444.3494, 431.7618, 388.1408, 387.3063] +25-08-31 03:28:59 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:28:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:28:59 | D | - sum error = [ 393.4840, 377.5462, 374.6377, 426.3998, 386.7618] +25-08-31 03:28:59 | D | - best error = [ 387.3063, 377.5462, 374.6377, 374.6377, 374.6377] +25-08-31 03:28:59 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:28:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:28:59 | D | - sum error = [ 432.6090, 449.5177, 439.7915, 479.1333, 519.0977] +25-08-31 03:28:59 | D | - best error = [ 374.6377, 374.6377, 374.6377, 374.6377, 374.6377] +25-08-31 03:28:59 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:28:59 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:28:59 | D | - sum error = [ 972.9866, 964.0234, 784.4617, 742.4854, 637.1297] +25-08-31 03:28:59 | D | - best error = [ 374.6377, 374.6377, 374.6377, 374.6377, 374.6377] +25-08-31 03:28:59 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:28:59 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:28:59 | D | - sum error = [ 616.7108, 577.9005, 560.8786, 495.2751, 474.1094] +25-08-31 03:28:59 | D | - best error = [ 374.6377, 374.6377, 374.6377, 374.6377, 374.6377] +25-08-31 03:28:59 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:28:59 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:28:59 | D | - sum error = [ 445.4886, 436.8357, 437.0443, 426.9521, 433.5165] +25-08-31 03:28:59 | D | - best error = [ 374.6377, 374.6377, 374.6377, 374.6377, 374.6377] +25-08-31 03:28:59 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:28:59 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:28:59 | D | - sum error = [ 454.1715, 454.8307, 461.6601, 515.3515] +25-08-31 03:28:59 | D | - best error = [ 374.6377, 374.6377, 374.6377, 374.6377] +25-08-31 03:28:59 | D | + error = 374.6377 +25-08-31 03:28:59 | D | + scale = [min=0.1584, max=8.3329] +25-08-31 03:28:59 | D | - transformer_blocks.2.attn.out_proj + transformer_blocks.2.attn.add_out_proj +25-08-31 03:28:59 | D | + w: sfp4_e2m1_all +25-08-31 03:28:59 | D | + x: sfp4_e2m1_all +25-08-31 03:28:59 | D | + y: None +25-08-31 03:28:59 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:28:59 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:29:00 | D | + x - AbsMax +25-08-31 03:29:00 | D | + x = [min=0.2295, max=11.1250] +25-08-31 03:29:00 | D | + w - AbsMax +25-08-31 03:29:00 | D | + w = [min=0.1064, max=0.5977] +25-08-31 03:29:00 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:29:01 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:30:49 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:30:49 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:30:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:30:49 | D | - sum error = [ 3013.1376, 2965.8076, 2916.7376, 2879.3881, 2835.8874] +25-08-31 03:30:49 | D | - best error = [ 3013.1376, 2965.8076, 2916.7376, 2879.3881, 2835.8874] +25-08-31 03:30:49 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:30:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:30:49 | D | - sum error = [ 2810.9009, 2780.6131, 2762.8964, 2751.0742, 2741.8880] +25-08-31 03:30:49 | D | - best error = [ 2810.9009, 2780.6131, 2762.8964, 2751.0742, 2741.8880] +25-08-31 03:30:49 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:30:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:30:49 | D | - sum error = [ 2749.3268, 2741.0771, 2761.7188, 2758.6874, 2756.5543] +25-08-31 03:30:49 | D | - best error = [ 2741.8880, 2741.0771, 2741.0771, 2741.0771, 2741.0771] +25-08-31 03:30:49 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:30:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:30:49 | D | - sum error = [ 2769.2757, 2784.2769, 2797.6997, 2824.6718, 2844.0870] +25-08-31 03:30:49 | D | - best error = [ 2741.0771, 2741.0771, 2741.0771, 2741.0771, 2741.0771] +25-08-31 03:30:49 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:30:49 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:30:49 | D | - sum error = [ 3186.8020, 3101.3084, 3044.9378, 2986.4276, 2928.0564] +25-08-31 03:30:49 | D | - best error = [ 2741.0771, 2741.0771, 2741.0771, 2741.0771, 2741.0771] +25-08-31 03:30:49 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:30:49 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:30:49 | D | - sum error = [ 2874.3796, 2842.5941, 2820.1704, 2805.1607, 2789.3816] +25-08-31 03:30:49 | D | - best error = [ 2741.0771, 2741.0771, 2741.0771, 2741.0771, 2741.0771] +25-08-31 03:30:49 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:30:49 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:30:49 | D | - sum error = [ 2786.1078, 2768.1926, 2764.5401, 2764.0053, 2769.4250] +25-08-31 03:30:49 | D | - best error = [ 2741.0771, 2741.0771, 2741.0771, 2741.0771, 2741.0771] +25-08-31 03:30:49 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:30:49 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:30:49 | D | - sum error = [ 2771.7853, 2815.6287, 2812.3949, 2835.3467] +25-08-31 03:30:49 | D | - best error = [ 2741.0771, 2741.0771, 2741.0771, 2741.0771] +25-08-31 03:30:49 | D | + error = 2741.0771 +25-08-31 03:30:49 | D | + scale = [min=0.4451, max=3.7624] +25-08-31 03:30:49 | D | - transformer_blocks.2.ff.up_proj +25-08-31 03:30:49 | D | + w: sfp4_e2m1_all +25-08-31 03:30:49 | D | + x: sfp4_e2m1_all +25-08-31 03:30:49 | D | + y: None +25-08-31 03:30:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:30:49 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:30:50 | D | + x - AbsMax +25-08-31 03:30:50 | D | + x = [min=0.0099, max=6.1250] +25-08-31 03:30:50 | D | + w - AbsMax +25-08-31 03:30:50 | D | + w = [min=0.0325, max=0.7617] +25-08-31 03:30:50 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:30:51 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:32:07 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:32:07 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:32:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:32:07 | D | - sum error = [ 1045.1134, 1025.1163, 1005.8473, 994.2120, 980.7403] +25-08-31 03:32:07 | D | - best error = [ 1045.1134, 1025.1163, 1005.8473, 994.2120, 980.7403] +25-08-31 03:32:07 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:32:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:32:07 | D | - sum error = [ 973.3281, 963.9936, 961.5128, 959.0830, 961.0025] +25-08-31 03:32:07 | D | - best error = [ 973.3281, 963.9936, 961.5128, 959.0830, 959.0830] +25-08-31 03:32:07 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:32:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:32:07 | D | - sum error = [ 963.4719, 970.9217, 981.7205, 997.2079, 1020.1709] +25-08-31 03:32:07 | D | - best error = [ 959.0830, 959.0830, 959.0830, 959.0830, 959.0830] +25-08-31 03:32:07 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:32:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:32:07 | D | - sum error = [ 1053.5791, 1097.7479, 1155.2268, 1224.4397, 1300.3929] +25-08-31 03:32:07 | D | - best error = [ 959.0830, 959.0830, 959.0830, 959.0830, 959.0830] +25-08-31 03:32:07 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:32:07 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:32:07 | D | - sum error = [ 1453.9680, 1344.7675, 1261.8612, 1195.5818, 1144.9062] +25-08-31 03:32:07 | D | - best error = [ 959.0830, 959.0830, 959.0830, 959.0830, 959.0830] +25-08-31 03:32:07 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:32:07 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:32:07 | D | - sum error = [ 1099.0608, 1064.9943, 1038.2362, 1020.0262, 1002.3521] +25-08-31 03:32:07 | D | - best error = [ 959.0830, 959.0830, 959.0830, 959.0830, 959.0830] +25-08-31 03:32:07 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:32:07 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:32:07 | D | - sum error = [ 993.3183, 986.8878, 987.1827, 994.2926, 1015.4989] +25-08-31 03:32:07 | D | - best error = [ 959.0830, 959.0830, 959.0830, 959.0830, 959.0830] +25-08-31 03:32:07 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:32:07 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:32:07 | D | - sum error = [ 1046.8902, 1098.6524, 1177.2246, 1272.8951] +25-08-31 03:32:07 | D | - best error = [ 959.0830, 959.0830, 959.0830, 959.0830] +25-08-31 03:32:07 | D | + error = 959.0830 +25-08-31 03:32:07 | D | + scale = [min=0.1582, max=2.0646] +25-08-31 03:32:07 | D | - transformer_blocks.2.ff.down_proj +25-08-31 03:32:07 | D | + w: sfp4_e2m1_all +25-08-31 03:32:07 | D | + x: sfp4_e2m1_all +25-08-31 03:32:07 | D | + y: None +25-08-31 03:32:07 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:32:07 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:32:09 | D | + x - AbsMax +25-08-31 03:32:09 | D | + x = [min=0.1533, max=7.9062] +25-08-31 03:32:09 | D | + w - AbsMax +25-08-31 03:32:09 | D | + w = [min=0.0752, max=0.5977] +25-08-31 03:32:09 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:32:11 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:34:05 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:34:05 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:34:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:34:05 | D | - sum error = [ 1841.3848, 1786.5108, 1798.8453, 1787.2522, 1804.0460] +25-08-31 03:34:05 | D | - best error = [ 1841.3848, 1786.5108, 1786.5108, 1786.5108, 1786.5108] +25-08-31 03:34:05 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:34:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:34:05 | D | - sum error = [ 1778.4642, 1783.2365, 1788.0384, 1747.1729, 1771.5478] +25-08-31 03:34:05 | D | - best error = [ 1778.4642, 1778.4642, 1778.4642, 1747.1729, 1747.1729] +25-08-31 03:34:05 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:34:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:34:05 | D | - sum error = [ 1777.8083, 1800.8622, 1779.5582, 1768.9431, 1796.8097] +25-08-31 03:34:05 | D | - best error = [ 1747.1729, 1747.1729, 1747.1729, 1747.1729, 1747.1729] +25-08-31 03:34:05 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:34:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:34:05 | D | - sum error = [ 1836.9738, 1856.2183, 1835.0763, 1833.6339, 1869.7002] +25-08-31 03:34:05 | D | - best error = [ 1747.1729, 1747.1729, 1747.1729, 1747.1729, 1747.1729] +25-08-31 03:34:05 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:34:05 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:34:05 | D | - sum error = [ 2147.9376, 2093.9488, 2053.1423, 1979.1410, 1942.6535] +25-08-31 03:34:05 | D | - best error = [ 1747.1729, 1747.1729, 1747.1729, 1747.1729, 1747.1729] +25-08-31 03:34:05 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:34:05 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:34:05 | D | - sum error = [ 1899.6315, 1857.2149, 1830.7171, 1810.2195, 1817.0160] +25-08-31 03:34:05 | D | - best error = [ 1747.1729, 1747.1729, 1747.1729, 1747.1729, 1747.1729] +25-08-31 03:34:05 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:34:05 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:34:05 | D | - sum error = [ 1806.3366, 1794.2930, 1787.0893, 1800.4002, 1811.4218] +25-08-31 03:34:05 | D | - best error = [ 1747.1729, 1747.1729, 1747.1729, 1747.1729, 1747.1729] +25-08-31 03:34:05 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:34:05 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:34:05 | D | - sum error = [ 1814.1376, 1836.0929, 1864.7675, 1874.7841] +25-08-31 03:34:05 | D | - best error = [ 1747.1729, 1747.1729, 1747.1729, 1747.1729] +25-08-31 03:34:05 | D | + error = 1747.1729 +25-08-31 03:34:05 | D | + scale = [min=0.4723, max=2.2866] +25-08-31 03:34:05 | D | - transformer_blocks.2.ff_context.up_proj +25-08-31 03:34:05 | D | + w: sfp4_e2m1_all +25-08-31 03:34:05 | D | + x: sfp4_e2m1_all +25-08-31 03:34:05 | D | + y: None +25-08-31 03:34:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:34:05 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:34:05 | D | + x - AbsMax +25-08-31 03:34:05 | D | + x = [min=0.1064, max=69.5000] +25-08-31 03:34:05 | D | + w - AbsMax +25-08-31 03:34:05 | D | + w = [min=0.1021, max=0.4824] +25-08-31 03:34:05 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:34:06 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:34:50 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:34:50 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:34:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:34:50 | D | - sum error = [ 2746.5906, 2666.5069, 2606.9086, 2560.5147, 2499.4019] +25-08-31 03:34:50 | D | - best error = [ 2746.5906, 2666.5069, 2606.9086, 2560.5147, 2499.4019] +25-08-31 03:34:50 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:34:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:34:50 | D | - sum error = [ 2477.0533, 2424.4037, 2407.2640, 2386.6805, 2373.5829] +25-08-31 03:34:50 | D | - best error = [ 2477.0533, 2424.4037, 2407.2640, 2386.6805, 2373.5829] +25-08-31 03:34:50 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:34:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:34:50 | D | - sum error = [ 2369.8101, 2361.0110, 2326.7772, 2327.1390, 2344.5029] +25-08-31 03:34:50 | D | - best error = [ 2369.8101, 2361.0110, 2326.7772, 2326.7772, 2326.7772] +25-08-31 03:34:50 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:34:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:34:50 | D | - sum error = [ 2345.2727, 2368.1615, 2407.1106, 2436.7567, 2460.1619] +25-08-31 03:34:50 | D | - best error = [ 2326.7772, 2326.7772, 2326.7772, 2326.7772, 2326.7772] +25-08-31 03:34:50 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:34:50 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:34:50 | D | - sum error = [ 2768.8117, 2674.9335, 2624.4926, 2565.8773, 2507.2394] +25-08-31 03:34:50 | D | - best error = [ 2326.7772, 2326.7772, 2326.7772, 2326.7772, 2326.7772] +25-08-31 03:34:50 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:34:50 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:34:50 | D | - sum error = [ 2454.9990, 2430.2525, 2398.8965, 2395.2572, 2353.0343] +25-08-31 03:34:50 | D | - best error = [ 2326.7772, 2326.7772, 2326.7772, 2326.7772, 2326.7772] +25-08-31 03:34:50 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:34:50 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:34:50 | D | - sum error = [ 2340.8599, 2340.6598, 2345.9702, 2367.4669, 2361.5360] +25-08-31 03:34:50 | D | - best error = [ 2326.7772, 2326.7772, 2326.7772, 2326.7772, 2326.7772] +25-08-31 03:34:50 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:34:50 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:34:50 | D | - sum error = [ 2392.7786, 2407.4582, 2421.0955, 2454.9552] +25-08-31 03:34:50 | D | - best error = [ 2326.7772, 2326.7772, 2326.7772, 2326.7772] +25-08-31 03:34:50 | D | + error = 2326.7772 +25-08-31 03:34:50 | D | + scale = [min=0.2608, max=12.7406] +25-08-31 03:34:50 | D | - transformer_blocks.2.ff_context.down_proj +25-08-31 03:34:50 | D | + w: sfp4_e2m1_all +25-08-31 03:34:50 | D | + x: sfp4_e2m1_all +25-08-31 03:34:50 | D | + y: None +25-08-31 03:34:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:34:50 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:34:50 | D | + x - AbsMax +25-08-31 03:34:50 | D | + x = [min=0.0000, max=31.6250] +25-08-31 03:34:50 | D | + w - AbsMax +25-08-31 03:34:50 | D | + w = [min=0.0228, max=0.6094] +25-08-31 03:34:50 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:34:51 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:35:39 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:35:39 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:35:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:35:39 | D | - sum error = [ 607.1625, 586.4895, 577.8273, 571.1905, 564.8856] +25-08-31 03:35:39 | D | - best error = [ 607.1625, 586.4895, 577.8273, 571.1905, 564.8856] +25-08-31 03:35:39 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:35:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:35:39 | D | - sum error = [ 555.8188, 557.1414, 553.7299, 550.0923, 556.1223] +25-08-31 03:35:39 | D | - best error = [ 555.8188, 555.8188, 553.7299, 550.0923, 550.0923] +25-08-31 03:35:39 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:35:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:35:39 | D | - sum error = [ 552.0586, 552.9596, 558.3591, 563.3192, 572.1656] +25-08-31 03:35:39 | D | - best error = [ 550.0923, 550.0923, 550.0923, 550.0923, 550.0923] +25-08-31 03:35:39 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:35:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:35:39 | D | - sum error = [ 579.6220, 589.9860, 605.0640, 616.3263, 636.7527] +25-08-31 03:35:39 | D | - best error = [ 550.0923, 550.0923, 550.0923, 550.0923, 550.0923] +25-08-31 03:35:39 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:35:39 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:35:39 | D | - sum error = [ 685.0514, 668.6551, 657.7739, 646.7637, 635.8732] +25-08-31 03:35:39 | D | - best error = [ 550.0923, 550.0923, 550.0923, 550.0923, 550.0923] +25-08-31 03:35:39 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:35:39 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:35:39 | D | - sum error = [ 630.5924, 625.5606, 622.3314, 616.7918, 610.5790] +25-08-31 03:35:39 | D | - best error = [ 550.0923, 550.0923, 550.0923, 550.0923, 550.0923] +25-08-31 03:35:39 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:35:39 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:35:39 | D | - sum error = [ 613.6594, 615.3958, 609.9168, 614.5409, 619.4323] +25-08-31 03:35:39 | D | - best error = [ 550.0923, 550.0923, 550.0923, 550.0923, 550.0923] +25-08-31 03:35:39 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:35:39 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:35:39 | D | - sum error = [ 624.9346, 627.9067, 636.5848, 646.8630] +25-08-31 03:35:39 | D | - best error = [ 550.0923, 550.0923, 550.0923, 550.0923] +25-08-31 03:35:39 | D | + error = 550.0923 +25-08-31 03:35:39 | D | + scale = [min=0.0019, max=3.9812] +25-08-31 03:35:59 | D | - Smoothing Diffusion Block transformer_blocks.3 +25-08-31 03:35:59 | D | - Skipping Module transformer_blocks.3.norm1.linear +25-08-31 03:35:59 | D | - Skipping Module transformer_blocks.3.norm1_context.linear +25-08-31 03:35:59 | D | - Smoothing Transformer Block transformer_blocks.3 +25-08-31 03:35:59 | D | - transformer_blocks.3.attn.qkv_proj +25-08-31 03:35:59 | D | + w: sfp4_e2m1_all +25-08-31 03:35:59 | D | + x: sfp4_e2m1_all +25-08-31 03:35:59 | D | + y: None +25-08-31 03:35:59 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:35:59 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:35:59 | D | + x - AbsMax +25-08-31 03:35:59 | D | + x = [min=0.0233, max=12.5625] +25-08-31 03:35:59 | D | + w - AbsMax +25-08-31 03:35:59 | D | + w = [min=0.0923, max=1.1641] +25-08-31 03:35:59 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:36:00 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:37:13 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:37:13 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:37:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:37:13 | D | - sum error = [ 1621.5856, 1527.2417, 1441.9728, 1396.7592, 1388.3540] +25-08-31 03:37:13 | D | - best error = [ 1621.5856, 1527.2417, 1441.9728, 1396.7592, 1388.3540] +25-08-31 03:37:13 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:37:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:37:13 | D | - sum error = [ 1372.4239, 1353.8333, 1366.4652, 1368.5475, 1389.3494] +25-08-31 03:37:13 | D | - best error = [ 1372.4239, 1353.8333, 1353.8333, 1353.8333, 1353.8333] +25-08-31 03:37:13 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:37:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:37:13 | D | - sum error = [ 1411.9016, 1484.5307, 1592.4015, 1688.3424, 1827.7742] +25-08-31 03:37:13 | D | - best error = [ 1353.8333, 1353.8333, 1353.8333, 1353.8333, 1353.8333] +25-08-31 03:37:13 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:37:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:37:13 | D | - sum error = [ 2110.6083, 2355.2408, 2570.3898, 2787.8235, 3051.3395] +25-08-31 03:37:13 | D | - best error = [ 1353.8333, 1353.8333, 1353.8333, 1353.8333, 1353.8333] +25-08-31 03:37:13 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:37:13 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:37:13 | D | - sum error = [ 3305.6319, 3046.4231, 2722.5113, 2433.8291, 2253.5925] +25-08-31 03:37:13 | D | - best error = [ 1353.8333, 1353.8333, 1353.8333, 1353.8333, 1353.8333] +25-08-31 03:37:13 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:37:13 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:37:13 | D | - sum error = [ 2245.7553, 2100.6549, 2064.7603, 1892.9426, 1853.6644] +25-08-31 03:37:13 | D | - best error = [ 1353.8333, 1353.8333, 1353.8333, 1353.8333, 1353.8333] +25-08-31 03:37:13 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:37:13 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:37:13 | D | - sum error = [ 1847.8016, 1921.5528, 1978.6807, 2053.2613, 2222.0664] +25-08-31 03:37:13 | D | - best error = [ 1353.8333, 1353.8333, 1353.8333, 1353.8333, 1353.8333] +25-08-31 03:37:13 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:37:13 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:37:13 | D | - sum error = [ 2513.2284, 2661.5384, 2894.7811, 3092.1315] +25-08-31 03:37:13 | D | - best error = [ 1353.8333, 1353.8333, 1353.8333, 1353.8333] +25-08-31 03:37:13 | D | + error = 1353.8333 +25-08-31 03:37:13 | D | + scale = [min=0.3238, max=2.1366] +25-08-31 03:37:13 | D | - transformer_blocks.3.attn add_qkv_proj +25-08-31 03:37:13 | D | + w: sfp4_e2m1_all +25-08-31 03:37:13 | D | + x: sfp4_e2m1_all +25-08-31 03:37:13 | D | + y: None +25-08-31 03:37:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:37:13 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:37:13 | D | + x - AbsMax +25-08-31 03:37:13 | D | + x = [min=0.0820, max=23.6250] +25-08-31 03:37:13 | D | + w - AbsMax +25-08-31 03:37:13 | D | + w = [min=0.1060, max=0.4746] +25-08-31 03:37:13 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 03:37:14 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 03:38:17 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:38:17 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:38:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:38:17 | D | - sum error = [ 741.7560, 665.2999, 659.1876, 633.1523, 610.7416] +25-08-31 03:38:17 | D | - best error = [ 741.7560, 665.2999, 659.1876, 633.1523, 610.7416] +25-08-31 03:38:17 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:38:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:38:17 | D | - sum error = [ 614.1686, 586.0752, 575.5956, 567.7518, 553.4092] +25-08-31 03:38:17 | D | - best error = [ 610.7416, 586.0752, 575.5956, 567.7518, 553.4092] +25-08-31 03:38:17 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:38:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:38:17 | D | - sum error = [ 564.1676, 576.0169, 575.2761, 579.1137, 619.4243] +25-08-31 03:38:17 | D | - best error = [ 553.4092, 553.4092, 553.4092, 553.4092, 553.4092] +25-08-31 03:38:17 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:38:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:38:17 | D | - sum error = [ 644.1486, 625.8643, 676.1878, 732.0418, 745.8359] +25-08-31 03:38:17 | D | - best error = [ 553.4092, 553.4092, 553.4092, 553.4092, 553.4092] +25-08-31 03:38:17 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:38:17 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:38:17 | D | - sum error = [ 922.4497, 895.6543, 803.8271, 757.2526, 728.3515] +25-08-31 03:38:17 | D | - best error = [ 553.4092, 553.4092, 553.4092, 553.4092, 553.4092] +25-08-31 03:38:17 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:38:17 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:38:17 | D | - sum error = [ 680.9529, 658.6833, 642.2792, 630.6342, 613.4343] +25-08-31 03:38:17 | D | - best error = [ 553.4092, 553.4092, 553.4092, 553.4092, 553.4092] +25-08-31 03:38:17 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:38:17 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:38:17 | D | - sum error = [ 609.5911, 616.7694, 597.5782, 654.6396, 627.4828] +25-08-31 03:38:17 | D | - best error = [ 553.4092, 553.4092, 553.4092, 553.4092, 553.4092] +25-08-31 03:38:17 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:38:17 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:38:17 | D | - sum error = [ 640.3805, 689.7170, 718.8980, 750.5363] +25-08-31 03:38:17 | D | - best error = [ 553.4092, 553.4092, 553.4092, 553.4092] +25-08-31 03:38:17 | D | + error = 553.4092 +25-08-31 03:38:17 | D | + scale = [min=0.3246, max=4.1497] +25-08-31 03:38:18 | D | - transformer_blocks.3.attn.out_proj + transformer_blocks.3.attn.add_out_proj +25-08-31 03:38:18 | D | + w: sfp4_e2m1_all +25-08-31 03:38:18 | D | + x: sfp4_e2m1_all +25-08-31 03:38:18 | D | + y: None +25-08-31 03:38:18 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:38:18 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 03:38:18 | D | + x - AbsMax +25-08-31 03:38:18 | D | + x = [min=0.3574, max=11.3750] +25-08-31 03:38:18 | D | + w - AbsMax +25-08-31 03:38:18 | D | + w = [min=0.1064, max=0.3906] +25-08-31 03:38:18 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:38:19 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:40:06 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:40:06 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:40:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:40:06 | D | - sum error = [ 3067.1259, 3054.6453, 3023.6273, 3000.4336, 2980.2134] +25-08-31 03:40:06 | D | - best error = [ 3067.1259, 3054.6453, 3023.6273, 3000.4336, 2980.2134] +25-08-31 03:40:06 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:40:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:40:06 | D | - sum error = [ 2976.8243, 2967.7655, 2959.4028, 2953.6401, 2955.0635] +25-08-31 03:40:06 | D | - best error = [ 2976.8243, 2967.7655, 2959.4028, 2953.6401, 2953.6401] +25-08-31 03:40:06 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:40:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:40:06 | D | - sum error = [ 2958.8369, 2972.7899, 2972.0515, 2986.3407, 2997.5333] +25-08-31 03:40:06 | D | - best error = [ 2953.6401, 2953.6401, 2953.6401, 2953.6401, 2953.6401] +25-08-31 03:40:06 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:40:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:40:06 | D | - sum error = [ 3015.3708, 3029.5628, 3037.8701, 3056.0370, 3097.4000] +25-08-31 03:40:06 | D | - best error = [ 2953.6401, 2953.6401, 2953.6401, 2953.6401, 2953.6401] +25-08-31 03:40:06 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:40:06 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:40:06 | D | - sum error = [ 3242.4725, 3195.1631, 3155.7684, 3112.9854, 3078.2285] +25-08-31 03:40:06 | D | - best error = [ 2953.6401, 2953.6401, 2953.6401, 2953.6401, 2953.6401] +25-08-31 03:40:06 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:40:06 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:40:06 | D | - sum error = [ 3053.8558, 3024.7773, 3010.7668, 2983.9600, 2986.0889] +25-08-31 03:40:06 | D | - best error = [ 2953.6401, 2953.6401, 2953.6401, 2953.6401, 2953.6401] +25-08-31 03:40:06 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:40:06 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:40:06 | D | - sum error = [ 2987.7664, 2985.8701, 2998.2955, 3006.0514, 3011.4487] +25-08-31 03:40:06 | D | - best error = [ 2953.6401, 2953.6401, 2953.6401, 2953.6401, 2953.6401] +25-08-31 03:40:06 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:40:06 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:40:06 | D | - sum error = [ 3023.8522, 3035.2394, 3054.1902, 3077.2294] +25-08-31 03:40:06 | D | - best error = [ 2953.6401, 2953.6401, 2953.6401, 2953.6401] +25-08-31 03:40:06 | D | + error = 2953.6401 +25-08-31 03:40:06 | D | + scale = [min=0.6626, max=2.6447] +25-08-31 03:40:06 | D | - transformer_blocks.3.ff.up_proj +25-08-31 03:40:06 | D | + w: sfp4_e2m1_all +25-08-31 03:40:06 | D | + x: sfp4_e2m1_all +25-08-31 03:40:06 | D | + y: None +25-08-31 03:40:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:40:06 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:40:07 | D | + x - AbsMax +25-08-31 03:40:07 | D | + x = [min=0.0282, max=12.5625] +25-08-31 03:40:07 | D | + w - AbsMax +25-08-31 03:40:07 | D | + w = [min=0.0552, max=0.5781] +25-08-31 03:40:07 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:40:08 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:41:22 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:41:22 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:41:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:41:22 | D | - sum error = [ 2291.4753, 2214.0417, 2150.2452, 2094.8184, 2057.8905] +25-08-31 03:41:22 | D | - best error = [ 2291.4753, 2214.0417, 2150.2452, 2094.8184, 2057.8905] +25-08-31 03:41:22 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:41:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:41:22 | D | - sum error = [ 2024.0627, 1999.5815, 1978.1345, 1977.3814, 1968.1059] +25-08-31 03:41:22 | D | - best error = [ 2024.0627, 1999.5815, 1978.1345, 1977.3814, 1968.1059] +25-08-31 03:41:22 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:41:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:41:22 | D | - sum error = [ 1972.2322, 1989.0941, 2021.3706, 2069.0430, 2129.6744] +25-08-31 03:41:22 | D | - best error = [ 1968.1059, 1968.1059, 1968.1059, 1968.1059, 1968.1059] +25-08-31 03:41:22 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:41:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:41:22 | D | - sum error = [ 2221.4211, 2321.0029, 2448.8247, 2595.8577, 2768.2563] +25-08-31 03:41:22 | D | - best error = [ 1968.1059, 1968.1059, 1968.1059, 1968.1059, 1968.1059] +25-08-31 03:41:22 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:41:22 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:41:22 | D | - sum error = [ 3088.5608, 2850.3021, 2654.2396, 2499.7042, 2365.6478] +25-08-31 03:41:22 | D | - best error = [ 1968.1059, 1968.1059, 1968.1059, 1968.1059, 1968.1059] +25-08-31 03:41:22 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:41:22 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:41:22 | D | - sum error = [ 2268.1731, 2181.5384, 2126.6310, 2081.9869, 2049.3984] +25-08-31 03:41:22 | D | - best error = [ 1968.1059, 1968.1059, 1968.1059, 1968.1059, 1968.1059] +25-08-31 03:41:22 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:41:22 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:41:22 | D | - sum error = [ 2028.2550, 2020.9734, 2032.5670, 2062.2793, 2124.0401] +25-08-31 03:41:22 | D | - best error = [ 1968.1059, 1968.1059, 1968.1059, 1968.1059, 1968.1059] +25-08-31 03:41:22 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:41:22 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:41:22 | D | - sum error = [ 2216.6922, 2342.9888, 2516.4506, 2718.8183] +25-08-31 03:41:22 | D | - best error = [ 1968.1059, 1968.1059, 1968.1059, 1968.1059] +25-08-31 03:41:22 | D | + error = 1968.1059 +25-08-31 03:41:22 | D | + scale = [min=0.2007, max=3.1231] +25-08-31 03:41:23 | D | - transformer_blocks.3.ff.down_proj +25-08-31 03:41:23 | D | + w: sfp4_e2m1_all +25-08-31 03:41:23 | D | + x: sfp4_e2m1_all +25-08-31 03:41:23 | D | + y: None +25-08-31 03:41:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:41:23 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:41:24 | D | + x - AbsMax +25-08-31 03:41:24 | D | + x = [min=0.2109, max=17.3750] +25-08-31 03:41:24 | D | + w - AbsMax +25-08-31 03:41:24 | D | + w = [min=0.0679, max=0.7656] +25-08-31 03:41:24 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:41:26 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:43:11 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:43:11 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:43:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:43:11 | D | - sum error = [ 2307.2533, 2236.7431, 2171.2471, 2121.5354, 2081.5159] +25-08-31 03:43:11 | D | - best error = [ 2307.2533, 2236.7431, 2171.2471, 2121.5354, 2081.5159] +25-08-31 03:43:11 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:43:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:43:11 | D | - sum error = [ 2054.3454, 2040.5428, 2040.9390, 2054.2837, 2072.6910] +25-08-31 03:43:11 | D | - best error = [ 2054.3454, 2040.5428, 2040.5428, 2040.5428, 2040.5428] +25-08-31 03:43:11 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:43:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:43:11 | D | - sum error = [ 2098.2119, 2151.7753, 2223.6222, 2321.0004, 2453.6885] +25-08-31 03:43:11 | D | - best error = [ 2040.5428, 2040.5428, 2040.5428, 2040.5428, 2040.5428] +25-08-31 03:43:11 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:43:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:43:11 | D | - sum error = [ 2608.9879, 2819.4953, 3064.0085, 3362.6927, 3723.4757] +25-08-31 03:43:11 | D | - best error = [ 2040.5428, 2040.5428, 2040.5428, 2040.5428, 2040.5428] +25-08-31 03:43:11 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:43:11 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:43:11 | D | - sum error = [ 3087.5432, 2906.8526, 2731.6771, 2580.1205, 2445.0368] +25-08-31 03:43:11 | D | - best error = [ 2040.5428, 2040.5428, 2040.5428, 2040.5428, 2040.5428] +25-08-31 03:43:11 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:43:11 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:43:11 | D | - sum error = [ 2335.2807, 2229.5881, 2153.3850, 2099.4071, 2077.5134] +25-08-31 03:43:11 | D | - best error = [ 2040.5428, 2040.5428, 2040.5428, 2040.5428, 2040.5428] +25-08-31 03:43:11 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:43:11 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:43:11 | D | - sum error = [ 2071.2238, 2103.6786, 2154.6803, 2256.2965, 2384.8032] +25-08-31 03:43:11 | D | - best error = [ 2040.5428, 2040.5428, 2040.5428, 2040.5428, 2040.5428] +25-08-31 03:43:11 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:43:11 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:43:11 | D | - sum error = [ 2553.7099, 2810.9771, 3162.0503, 3588.7949] +25-08-31 03:43:11 | D | - best error = [ 2040.5428, 2040.5428, 2040.5428, 2040.5428] +25-08-31 03:43:11 | D | + error = 2040.5428 +25-08-31 03:43:11 | D | + scale = [min=0.6270, max=2.3549] +25-08-31 03:43:11 | D | - transformer_blocks.3.ff_context.up_proj +25-08-31 03:43:11 | D | + w: sfp4_e2m1_all +25-08-31 03:43:11 | D | + x: sfp4_e2m1_all +25-08-31 03:43:11 | D | + y: None +25-08-31 03:43:11 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:43:11 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 03:43:11 | D | + x - AbsMax +25-08-31 03:43:11 | D | + x = [min=0.1025, max=13.4375] +25-08-31 03:43:11 | D | + w - AbsMax +25-08-31 03:43:11 | D | + w = [min=0.0957, max=0.5234] +25-08-31 03:43:11 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:43:12 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:43:55 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:43:55 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:43:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:43:55 | D | - sum error = [ 2895.6669, 2821.8802, 2793.3096, 2746.5441, 2693.3185] +25-08-31 03:43:55 | D | - best error = [ 2895.6669, 2821.8802, 2793.3096, 2746.5441, 2693.3185] +25-08-31 03:43:55 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:43:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:43:55 | D | - sum error = [ 2670.7366, 2638.6181, 2624.8338, 2607.1562, 2606.8463] +25-08-31 03:43:55 | D | - best error = [ 2670.7366, 2638.6181, 2624.8338, 2607.1562, 2606.8463] +25-08-31 03:43:55 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:43:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:43:55 | D | - sum error = [ 2598.0733, 2580.6102, 2601.9404, 2614.3923, 2589.9162] +25-08-31 03:43:55 | D | - best error = [ 2598.0733, 2580.6102, 2580.6102, 2580.6102, 2580.6102] +25-08-31 03:43:55 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:43:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:43:55 | D | - sum error = [ 2603.1148, 2635.4750, 2670.4822, 2692.0221, 2741.1970] +25-08-31 03:43:55 | D | - best error = [ 2580.6102, 2580.6102, 2580.6102, 2580.6102, 2580.6102] +25-08-31 03:43:55 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:43:55 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:43:55 | D | - sum error = [ 2897.3581, 2847.8710, 2773.6210, 2735.1101, 2687.0546] +25-08-31 03:43:55 | D | - best error = [ 2580.6102, 2580.6102, 2580.6102, 2580.6102, 2580.6102] +25-08-31 03:43:55 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:43:55 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:43:55 | D | - sum error = [ 2669.2001, 2662.0330, 2624.6779, 2612.1821, 2602.8257] +25-08-31 03:43:55 | D | - best error = [ 2580.6102, 2580.6102, 2580.6102, 2580.6102, 2580.6102] +25-08-31 03:43:55 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:43:55 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:43:55 | D | - sum error = [ 2599.7656, 2579.0070, 2593.6813, 2603.9955, 2602.4562] +25-08-31 03:43:55 | D | - best error = [ 2580.6102, 2579.0070, 2579.0070, 2579.0070, 2579.0070] +25-08-31 03:43:55 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:43:55 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:43:55 | D | - sum error = [ 2622.7976, 2672.9448, 2697.1975, 2730.5154] +25-08-31 03:43:55 | D | - best error = [ 2579.0070, 2579.0070, 2579.0070, 2579.0070] +25-08-31 03:43:55 | D | + error = 2579.0070 +25-08-31 03:43:55 | D | + scale = [min=0.5318, max=8.8579] +25-08-31 03:43:55 | D | - transformer_blocks.3.ff_context.down_proj +25-08-31 03:43:55 | D | + w: sfp4_e2m1_all +25-08-31 03:43:55 | D | + x: sfp4_e2m1_all +25-08-31 03:43:55 | D | + y: None +25-08-31 03:43:55 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:43:55 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:43:56 | D | + x - AbsMax +25-08-31 03:43:56 | D | + x = [min=0.0000, max=35.2500] +25-08-31 03:43:56 | D | + w - AbsMax +25-08-31 03:43:56 | D | + w = [min=0.0232, max=0.5352] +25-08-31 03:43:56 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:43:56 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:44:44 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:44:44 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:44:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:44:44 | D | - sum error = [ 501.3159, 487.8766, 478.3680, 470.5820, 464.7409] +25-08-31 03:44:44 | D | - best error = [ 501.3159, 487.8766, 478.3680, 470.5820, 464.7409] +25-08-31 03:44:44 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:44:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:44:44 | D | - sum error = [ 462.4017, 456.5424, 456.9009, 451.8679, 451.7969] +25-08-31 03:44:44 | D | - best error = [ 462.4017, 456.5424, 456.5424, 451.8679, 451.7969] +25-08-31 03:44:44 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:44:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:44:44 | D | - sum error = [ 452.5818, 455.3171, 458.0218, 462.8594, 469.0578] +25-08-31 03:44:44 | D | - best error = [ 451.7969, 451.7969, 451.7969, 451.7969, 451.7969] +25-08-31 03:44:44 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:44:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:44:44 | D | - sum error = [ 473.6191, 481.0513, 493.0351, 504.6500, 519.9200] +25-08-31 03:44:44 | D | - best error = [ 451.7969, 451.7969, 451.7969, 451.7969, 451.7969] +25-08-31 03:44:44 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:44:44 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:44:44 | D | - sum error = [ 582.8250, 572.5430, 560.6468, 551.1214, 542.5834] +25-08-31 03:44:44 | D | - best error = [ 451.7969, 451.7969, 451.7969, 451.7969, 451.7969] +25-08-31 03:44:44 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:44:44 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:44:44 | D | - sum error = [ 534.4590, 529.7585, 523.9569, 521.5178, 517.6546] +25-08-31 03:44:44 | D | - best error = [ 451.7969, 451.7969, 451.7969, 451.7969, 451.7969] +25-08-31 03:44:44 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:44:44 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:44:44 | D | - sum error = [ 516.8115, 514.0135, 515.3916, 511.8993, 514.0783] +25-08-31 03:44:44 | D | - best error = [ 451.7969, 451.7969, 451.7969, 451.7969, 451.7969] +25-08-31 03:44:44 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:44:44 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:44:44 | D | - sum error = [ 514.6872, 535.2053, 521.7785, 528.3331] +25-08-31 03:44:44 | D | - best error = [ 451.7969, 451.7969, 451.7969, 451.7969] +25-08-31 03:44:44 | D | + error = 451.7969 +25-08-31 03:44:44 | D | + scale = [min=0.0008, max=4.9685] +25-08-31 03:45:03 | D | - Smoothing Diffusion Block transformer_blocks.4 +25-08-31 03:45:03 | D | - Skipping Module transformer_blocks.4.norm1.linear +25-08-31 03:45:03 | D | - Skipping Module transformer_blocks.4.norm1_context.linear +25-08-31 03:45:03 | D | - Smoothing Transformer Block transformer_blocks.4 +25-08-31 03:45:03 | D | - transformer_blocks.4.attn.qkv_proj +25-08-31 03:45:03 | D | + w: sfp4_e2m1_all +25-08-31 03:45:03 | D | + x: sfp4_e2m1_all +25-08-31 03:45:03 | D | + y: None +25-08-31 03:45:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:45:03 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:45:03 | D | + x - AbsMax +25-08-31 03:45:03 | D | + x = [min=0.0214, max=23.7500] +25-08-31 03:45:03 | D | + w - AbsMax +25-08-31 03:45:03 | D | + w = [min=0.0996, max=1.1797] +25-08-31 03:45:03 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:45:04 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:46:17 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:46:17 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:46:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:46:17 | D | - sum error = [ 2366.7604, 2241.0976, 2167.6144, 2121.8768, 2111.7752] +25-08-31 03:46:17 | D | - best error = [ 2366.7604, 2241.0976, 2167.6144, 2121.8768, 2111.7752] +25-08-31 03:46:17 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:46:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:46:17 | D | - sum error = [ 2063.2125, 2038.9202, 2045.4662, 2046.9498, 2083.1075] +25-08-31 03:46:17 | D | - best error = [ 2063.2125, 2038.9202, 2038.9202, 2038.9202, 2038.9202] +25-08-31 03:46:17 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:46:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:46:17 | D | - sum error = [ 2086.0917, 2167.5200, 2270.1461, 2382.6237, 2583.8497] +25-08-31 03:46:17 | D | - best error = [ 2038.9202, 2038.9202, 2038.9202, 2038.9202, 2038.9202] +25-08-31 03:46:17 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:46:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:46:17 | D | - sum error = [ 2748.0461, 3015.7493, 3219.1056, 3394.8987, 3716.2874] +25-08-31 03:46:17 | D | - best error = [ 2038.9202, 2038.9202, 2038.9202, 2038.9202, 2038.9202] +25-08-31 03:46:17 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:46:17 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:46:17 | D | - sum error = [ 4533.6136, 4185.2302, 3949.7808, 3840.5457, 3426.4152] +25-08-31 03:46:17 | D | - best error = [ 2038.9202, 2038.9202, 2038.9202, 2038.9202, 2038.9202] +25-08-31 03:46:17 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:46:17 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:46:17 | D | - sum error = [ 3251.3545, 3251.4334, 2941.7463, 2855.7130, 2840.9498] +25-08-31 03:46:17 | D | - best error = [ 2038.9202, 2038.9202, 2038.9202, 2038.9202, 2038.9202] +25-08-31 03:46:17 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:46:17 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:46:17 | D | - sum error = [ 2729.7251, 2796.1526, 2851.8083, 2968.3036, 2998.1883] +25-08-31 03:46:17 | D | - best error = [ 2038.9202, 2038.9202, 2038.9202, 2038.9202, 2038.9202] +25-08-31 03:46:17 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:46:17 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:46:17 | D | - sum error = [ 3177.3959, 3370.4067, 3535.3138, 3856.8416] +25-08-31 03:46:17 | D | - best error = [ 2038.9202, 2038.9202, 2038.9202, 2038.9202] +25-08-31 03:46:17 | D | + error = 2038.9202 +25-08-31 03:46:17 | D | + scale = [min=0.3154, max=2.5864] +25-08-31 03:46:18 | D | - transformer_blocks.4.attn add_qkv_proj +25-08-31 03:46:18 | D | + w: sfp4_e2m1_all +25-08-31 03:46:18 | D | + x: sfp4_e2m1_all +25-08-31 03:46:18 | D | + y: None +25-08-31 03:46:18 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:46:18 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:46:18 | D | + x - AbsMax +25-08-31 03:46:18 | D | + x = [min=0.0776, max=19.0000] +25-08-31 03:46:18 | D | + w - AbsMax +25-08-31 03:46:18 | D | + w = [min=0.1069, max=0.4785] +25-08-31 03:46:18 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:46:19 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:47:21 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:47:21 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:47:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:47:21 | D | - sum error = [ 722.1404, 709.3709, 669.2381, 666.1187, 642.4933] +25-08-31 03:47:21 | D | - best error = [ 722.1404, 709.3709, 669.2381, 666.1187, 642.4933] +25-08-31 03:47:21 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:47:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:47:21 | D | - sum error = [ 631.1271, 631.8438, 593.4709, 616.2633, 612.3001] +25-08-31 03:47:21 | D | - best error = [ 631.1271, 631.1271, 593.4709, 593.4709, 593.4709] +25-08-31 03:47:21 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:47:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:47:21 | D | - sum error = [ 609.1253, 600.1088, 606.9079, 624.7343, 626.4387] +25-08-31 03:47:21 | D | - best error = [ 593.4709, 593.4709, 593.4709, 593.4709, 593.4709] +25-08-31 03:47:21 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:47:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:47:21 | D | - sum error = [ 653.4228, 673.2780, 717.7131, 729.8384, 793.4484] +25-08-31 03:47:21 | D | - best error = [ 593.4709, 593.4709, 593.4709, 593.4709, 593.4709] +25-08-31 03:47:21 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:47:21 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:47:21 | D | - sum error = [ 930.6205, 921.1282, 847.9504, 800.2939, 772.9961] +25-08-31 03:47:21 | D | - best error = [ 593.4709, 593.4709, 593.4709, 593.4709, 593.4709] +25-08-31 03:47:21 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:47:21 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:47:21 | D | - sum error = [ 734.8788, 710.9275, 712.1798, 665.8504, 668.3132] +25-08-31 03:47:21 | D | - best error = [ 593.4709, 593.4709, 593.4709, 593.4709, 593.4709] +25-08-31 03:47:21 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:47:21 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:47:21 | D | - sum error = [ 652.7849, 638.2042, 635.2959, 650.8790, 662.5321] +25-08-31 03:47:21 | D | - best error = [ 593.4709, 593.4709, 593.4709, 593.4709, 593.4709] +25-08-31 03:47:21 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:47:21 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:47:21 | D | - sum error = [ 684.0755, 695.7951, 747.8499, 797.1162] +25-08-31 03:47:21 | D | - best error = [ 593.4709, 593.4709, 593.4709, 593.4709] +25-08-31 03:47:21 | D | + error = 593.4709 +25-08-31 03:47:21 | D | + scale = [min=0.4088, max=2.8026] +25-08-31 03:47:22 | D | - transformer_blocks.4.attn.out_proj + transformer_blocks.4.attn.add_out_proj +25-08-31 03:47:22 | D | + w: sfp4_e2m1_all +25-08-31 03:47:22 | D | + x: sfp4_e2m1_all +25-08-31 03:47:22 | D | + y: None +25-08-31 03:47:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:47:22 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:47:22 | D | + x - AbsMax +25-08-31 03:47:22 | D | + x = [min=0.5508, max=12.1875] +25-08-31 03:47:22 | D | + w - AbsMax +25-08-31 03:47:22 | D | + w = [min=0.1191, max=0.3906] +25-08-31 03:47:22 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:47:23 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:49:10 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:49:10 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:49:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:49:10 | D | - sum error = [ 3694.7741, 3675.3632, 3658.2699, 3637.6355, 3633.1730] +25-08-31 03:49:10 | D | - best error = [ 3694.7741, 3675.3632, 3658.2699, 3637.6355, 3633.1730] +25-08-31 03:49:10 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:49:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:49:10 | D | - sum error = [ 3621.0940, 3608.0231, 3583.5873, 3608.8735, 3583.0308] +25-08-31 03:49:10 | D | - best error = [ 3621.0940, 3608.0231, 3583.5873, 3583.5873, 3583.0308] +25-08-31 03:49:10 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:49:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:49:10 | D | - sum error = [ 3588.6160, 3597.8574, 3594.6072, 3601.9757, 3610.8870] +25-08-31 03:49:10 | D | - best error = [ 3583.0308, 3583.0308, 3583.0308, 3583.0308, 3583.0308] +25-08-31 03:49:10 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:49:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:49:10 | D | - sum error = [ 3612.5887, 3625.5573, 3636.5899, 3660.1217, 3672.7973] +25-08-31 03:49:10 | D | - best error = [ 3583.0308, 3583.0308, 3583.0308, 3583.0308, 3583.0308] +25-08-31 03:49:10 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:49:10 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:49:10 | D | - sum error = [ 3867.6530, 3820.4648, 3785.5801, 3748.5803, 3722.1838] +25-08-31 03:49:10 | D | - best error = [ 3583.0308, 3583.0308, 3583.0308, 3583.0308, 3583.0308] +25-08-31 03:49:10 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:49:10 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:49:10 | D | - sum error = [ 3696.2528, 3684.0350, 3663.9749, 3644.4363, 3634.3295] +25-08-31 03:49:10 | D | - best error = [ 3583.0308, 3583.0308, 3583.0308, 3583.0308, 3583.0308] +25-08-31 03:49:10 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:49:10 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:49:10 | D | - sum error = [ 3630.2705, 3622.6642, 3620.0312, 3622.8278, 3621.1453] +25-08-31 03:49:10 | D | - best error = [ 3583.0308, 3583.0308, 3583.0308, 3583.0308, 3583.0308] +25-08-31 03:49:10 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:49:10 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:49:10 | D | - sum error = [ 3625.3120, 3643.3061, 3657.3147, 3670.6179] +25-08-31 03:49:10 | D | - best error = [ 3583.0308, 3583.0308, 3583.0308, 3583.0308] +25-08-31 03:49:10 | D | + error = 3583.0308 +25-08-31 03:49:10 | D | + scale = [min=0.7646, max=3.0808] +25-08-31 03:49:11 | D | - transformer_blocks.4.ff.up_proj +25-08-31 03:49:11 | D | + w: sfp4_e2m1_all +25-08-31 03:49:11 | D | + x: sfp4_e2m1_all +25-08-31 03:49:11 | D | + y: None +25-08-31 03:49:11 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:49:11 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:49:11 | D | + x - AbsMax +25-08-31 03:49:11 | D | + x = [min=0.0525, max=23.7500] +25-08-31 03:49:11 | D | + w - AbsMax +25-08-31 03:49:11 | D | + w = [min=0.0977, max=0.5586] +25-08-31 03:49:11 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:49:12 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:50:29 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:50:29 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:50:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:50:29 | D | - sum error = [ 6676.2064, 6505.8242, 6349.0154, 6212.5705, 6098.8638] +25-08-31 03:50:29 | D | - best error = [ 6676.2064, 6505.8242, 6349.0154, 6212.5705, 6098.8638] +25-08-31 03:50:29 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:50:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:50:29 | D | - sum error = [ 6016.5734, 5940.2949, 5890.2682, 5843.9736, 5817.9814] +25-08-31 03:50:29 | D | - best error = [ 6016.5734, 5940.2949, 5890.2682, 5843.9736, 5817.9814] +25-08-31 03:50:29 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:50:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:50:29 | D | - sum error = [ 5802.0283, 5802.8424, 5818.1719, 5846.3879, 5904.0461] +25-08-31 03:50:29 | D | - best error = [ 5802.0283, 5802.0283, 5802.0283, 5802.0283, 5802.0283] +25-08-31 03:50:29 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:50:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:50:29 | D | - sum error = [ 5991.1528, 6101.4769, 6244.3914, 6401.5279, 6598.2758] +25-08-31 03:50:29 | D | - best error = [ 5802.0283, 5802.0283, 5802.0283, 5802.0283, 5802.0283] +25-08-31 03:50:29 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:50:29 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:50:29 | D | - sum error = [ 7308.4305, 6989.5021, 6739.1183, 6513.9810, 6337.1950] +25-08-31 03:50:29 | D | - best error = [ 5802.0283, 5802.0283, 5802.0283, 5802.0283, 5802.0283] +25-08-31 03:50:29 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:50:29 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:50:29 | D | - sum error = [ 6203.8417, 6075.4164, 5998.8886, 5935.8801, 5888.6328] +25-08-31 03:50:29 | D | - best error = [ 5802.0283, 5802.0283, 5802.0283, 5802.0283, 5802.0283] +25-08-31 03:50:29 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:50:29 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:50:29 | D | - sum error = [ 5854.7014, 5855.5461, 5859.5652, 5895.0176, 5953.7020] +25-08-31 03:50:29 | D | - best error = [ 5802.0283, 5802.0283, 5802.0283, 5802.0283, 5802.0283] +25-08-31 03:50:29 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:50:29 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:50:29 | D | - sum error = [ 6045.2166, 6185.0853, 6349.2774, 6580.9218] +25-08-31 03:50:29 | D | - best error = [ 5802.0283, 5802.0283, 5802.0283, 5802.0283] +25-08-31 03:50:29 | D | + error = 5802.0283 +25-08-31 03:50:29 | D | + scale = [min=0.2291, max=4.8734] +25-08-31 03:50:29 | D | - transformer_blocks.4.ff.down_proj +25-08-31 03:50:29 | D | + w: sfp4_e2m1_all +25-08-31 03:50:29 | D | + x: sfp4_e2m1_all +25-08-31 03:50:29 | D | + y: None +25-08-31 03:50:29 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:50:29 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:50:31 | D | + x - AbsMax +25-08-31 03:50:31 | D | + x = [min=0.0000, max=23.0000] +25-08-31 03:50:31 | D | + w - AbsMax +25-08-31 03:50:31 | D | + w = [min=0.0967, max=1.0938] +25-08-31 03:50:31 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:50:33 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:52:30 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:52:30 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:52:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:52:30 | D | - sum error = [ 5943.5592, 5927.3772, 5904.3360, 5888.6144, 5888.5679] +25-08-31 03:52:30 | D | - best error = [ 5943.5592, 5927.3772, 5904.3360, 5888.6144, 5888.5679] +25-08-31 03:52:30 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:52:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:52:30 | D | - sum error = [ 5884.3491, 5877.8683, 5882.8357, 5882.1414, 5889.3314] +25-08-31 03:52:30 | D | - best error = [ 5884.3491, 5877.8683, 5877.8683, 5877.8683, 5877.8683] +25-08-31 03:52:30 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:52:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:52:30 | D | - sum error = [ 5891.0845, 5928.6224, 5928.2446, 5941.5614, 5969.4687] +25-08-31 03:52:30 | D | - best error = [ 5877.8683, 5877.8683, 5877.8683, 5877.8683, 5877.8683] +25-08-31 03:52:30 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:52:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:52:30 | D | - sum error = [ 6003.3071, 6034.2479, 6066.7220, 6108.0255, 6152.1167] +25-08-31 03:52:30 | D | - best error = [ 5877.8683, 5877.8683, 5877.8683, 5877.8683, 5877.8683] +25-08-31 03:52:30 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:52:30 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:52:30 | D | - sum error = [ 6438.9036, 6351.8341, 6288.2370, 6218.8880, 6166.3957] +25-08-31 03:52:30 | D | - best error = [ 5877.8683, 5877.8683, 5877.8683, 5877.8683, 5877.8683] +25-08-31 03:52:30 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:52:30 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:52:30 | D | - sum error = [ 6114.5335, 6073.1376, 6174.5944, 5996.5330, 5967.2285] +25-08-31 03:52:30 | D | - best error = [ 5877.8683, 5877.8683, 5877.8683, 5877.8683, 5877.8683] +25-08-31 03:52:30 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:52:30 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:52:30 | D | - sum error = [ 5936.2126, 5938.1364, 5940.5639, 5948.5551, 5965.1562] +25-08-31 03:52:30 | D | - best error = [ 5877.8683, 5877.8683, 5877.8683, 5877.8683, 5877.8683] +25-08-31 03:52:30 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:52:30 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:52:30 | D | - sum error = [ 5991.3725, 6029.9323, 6104.9554, 6131.5483] +25-08-31 03:52:30 | D | - best error = [ 5877.8683, 5877.8683, 5877.8683, 5877.8683] +25-08-31 03:52:30 | D | + error = 5877.8683 +25-08-31 03:52:30 | D | + scale = [min=0.0090, max=2.5616] +25-08-31 03:52:30 | D | - transformer_blocks.4.ff_context.up_proj +25-08-31 03:52:30 | D | + w: sfp4_e2m1_all +25-08-31 03:52:30 | D | + x: sfp4_e2m1_all +25-08-31 03:52:30 | D | + y: None +25-08-31 03:52:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:52:30 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:52:30 | D | + x - AbsMax +25-08-31 03:52:30 | D | + x = [min=0.1162, max=10.3750] +25-08-31 03:52:30 | D | + w - AbsMax +25-08-31 03:52:30 | D | + w = [min=0.0854, max=0.6484] +25-08-31 03:52:30 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:52:31 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:53:15 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:53:15 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:53:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:53:15 | D | - sum error = [ 2987.8004, 2928.1318, 2880.0802, 2830.6429, 2799.4649] +25-08-31 03:53:15 | D | - best error = [ 2987.8004, 2928.1318, 2880.0802, 2830.6429, 2799.4649] +25-08-31 03:53:15 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:53:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:53:15 | D | - sum error = [ 2757.7565, 2745.8694, 2707.3762, 2693.8663, 2686.2074] +25-08-31 03:53:15 | D | - best error = [ 2757.7565, 2745.8694, 2707.3762, 2693.8663, 2686.2074] +25-08-31 03:53:15 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:53:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:53:15 | D | - sum error = [ 2692.3033, 2697.0466, 2689.7944, 2705.3743, 2718.5247] +25-08-31 03:53:15 | D | - best error = [ 2686.2074, 2686.2074, 2686.2074, 2686.2074, 2686.2074] +25-08-31 03:53:15 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:53:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:53:15 | D | - sum error = [ 2721.3370, 2742.2965, 2754.3367, 2796.0809, 2812.3538] +25-08-31 03:53:15 | D | - best error = [ 2686.2074, 2686.2074, 2686.2074, 2686.2074, 2686.2074] +25-08-31 03:53:15 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:53:15 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:53:15 | D | - sum error = [ 2980.5756, 2923.4536, 2866.6402, 2832.2626, 2805.7424] +25-08-31 03:53:15 | D | - best error = [ 2686.2074, 2686.2074, 2686.2074, 2686.2074, 2686.2074] +25-08-31 03:53:15 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:53:15 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:53:15 | D | - sum error = [ 2759.6449, 2743.2460, 2716.5889, 2701.1738, 2680.1855] +25-08-31 03:53:15 | D | - best error = [ 2686.2074, 2686.2074, 2686.2074, 2686.2074, 2680.1855] +25-08-31 03:53:15 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:53:15 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:53:15 | D | - sum error = [ 2673.0128, 2681.0845, 2691.7723, 2702.7497, 2707.9522] +25-08-31 03:53:15 | D | - best error = [ 2673.0128, 2673.0128, 2673.0128, 2673.0128, 2673.0128] +25-08-31 03:53:15 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:53:15 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:53:15 | D | - sum error = [ 2743.6939, 2752.0461, 2772.3910, 2816.4983] +25-08-31 03:53:15 | D | - best error = [ 2673.0128, 2673.0128, 2673.0128, 2673.0128] +25-08-31 03:53:15 | D | + error = 2673.0128 +25-08-31 03:53:15 | D | + scale = [min=0.6192, max=9.1339] +25-08-31 03:53:15 | D | - transformer_blocks.4.ff_context.down_proj +25-08-31 03:53:15 | D | + w: sfp4_e2m1_all +25-08-31 03:53:15 | D | + x: sfp4_e2m1_all +25-08-31 03:53:15 | D | + y: None +25-08-31 03:53:15 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:53:15 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:53:16 | D | + x - AbsMax +25-08-31 03:53:16 | D | + x = [min=0.0000, max=36.5000] +25-08-31 03:53:16 | D | + w - AbsMax +25-08-31 03:53:16 | D | + w = [min=0.0227, max=0.5391] +25-08-31 03:53:16 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:53:16 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:54:06 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:54:06 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:54:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:54:06 | D | - sum error = [ 584.9552, 563.4080, 544.0449, 536.9064, 526.4745] +25-08-31 03:54:06 | D | - best error = [ 584.9552, 563.4080, 544.0449, 536.9064, 526.4745] +25-08-31 03:54:06 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:54:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:54:06 | D | - sum error = [ 517.0801, 513.2954, 510.8053, 503.3115, 502.2376] +25-08-31 03:54:06 | D | - best error = [ 517.0801, 513.2954, 510.8053, 503.3115, 502.2376] +25-08-31 03:54:06 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:54:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:54:06 | D | - sum error = [ 512.1767, 505.4889, 507.1565, 510.4673, 514.0226] +25-08-31 03:54:06 | D | - best error = [ 502.2376, 502.2376, 502.2376, 502.2376, 502.2376] +25-08-31 03:54:06 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:54:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:54:06 | D | - sum error = [ 521.6685, 533.5693, 543.2048, 558.4704, 573.4944] +25-08-31 03:54:06 | D | - best error = [ 502.2376, 502.2376, 502.2376, 502.2376, 502.2376] +25-08-31 03:54:06 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:54:06 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:54:06 | D | - sum error = [ 647.4660, 631.0364, 615.7539, 601.0326, 591.7331] +25-08-31 03:54:06 | D | - best error = [ 502.2376, 502.2376, 502.2376, 502.2376, 502.2376] +25-08-31 03:54:06 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:54:06 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:54:06 | D | - sum error = [ 582.7001, 573.8638, 569.1646, 565.0348, 563.7902] +25-08-31 03:54:06 | D | - best error = [ 502.2376, 502.2376, 502.2376, 502.2376, 502.2376] +25-08-31 03:54:06 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:54:06 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:54:06 | D | - sum error = [ 560.3712, 557.9312, 560.1054, 560.4558, 560.8772] +25-08-31 03:54:06 | D | - best error = [ 502.2376, 502.2376, 502.2376, 502.2376, 502.2376] +25-08-31 03:54:06 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:54:06 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:54:06 | D | - sum error = [ 563.3308, 570.8122, 575.4240, 579.9224] +25-08-31 03:54:06 | D | - best error = [ 502.2376, 502.2376, 502.2376, 502.2376] +25-08-31 03:54:06 | D | + error = 502.2376 +25-08-31 03:54:06 | D | + scale = [min=0.0008, max=5.0470] +25-08-31 03:54:26 | D | - Smoothing Diffusion Block transformer_blocks.5 +25-08-31 03:54:26 | D | - Skipping Module transformer_blocks.5.norm1.linear +25-08-31 03:54:26 | D | - Skipping Module transformer_blocks.5.norm1_context.linear +25-08-31 03:54:26 | D | - Smoothing Transformer Block transformer_blocks.5 +25-08-31 03:54:26 | D | - transformer_blocks.5.attn.qkv_proj +25-08-31 03:54:26 | D | + w: sfp4_e2m1_all +25-08-31 03:54:26 | D | + x: sfp4_e2m1_all +25-08-31 03:54:26 | D | + y: None +25-08-31 03:54:26 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:54:26 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:54:26 | D | + x - AbsMax +25-08-31 03:54:26 | D | + x = [min=0.0381, max=18.3750] +25-08-31 03:54:26 | D | + w - AbsMax +25-08-31 03:54:26 | D | + w = [min=0.1069, max=0.8477] +25-08-31 03:54:26 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:54:27 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:55:42 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:55:42 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:55:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:55:42 | D | - sum error = [ 2610.1669, 2543.9081, 2478.5938, 2492.7048, 2420.6241] +25-08-31 03:55:42 | D | - best error = [ 2610.1669, 2543.9081, 2478.5938, 2478.5938, 2420.6241] +25-08-31 03:55:42 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:55:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:55:42 | D | - sum error = [ 2411.7530, 2388.0323, 2369.2494, 2413.2468, 2355.4024] +25-08-31 03:55:42 | D | - best error = [ 2411.7530, 2388.0323, 2369.2494, 2369.2494, 2355.4024] +25-08-31 03:55:42 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:55:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:55:42 | D | - sum error = [ 2395.1376, 2429.0701, 2452.4848, 2488.2148, 2556.4013] +25-08-31 03:55:42 | D | - best error = [ 2355.4024, 2355.4024, 2355.4024, 2355.4024, 2355.4024] +25-08-31 03:55:42 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:55:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:55:42 | D | - sum error = [ 2686.2935, 2791.6022, 2940.8043, 3082.5879, 3220.6010] +25-08-31 03:55:42 | D | - best error = [ 2355.4024, 2355.4024, 2355.4024, 2355.4024, 2355.4024] +25-08-31 03:55:42 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:55:42 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:55:42 | D | - sum error = [ 4172.3408, 3815.3257, 3580.0201, 3420.2007, 3257.3557] +25-08-31 03:55:42 | D | - best error = [ 2355.4024, 2355.4024, 2355.4024, 2355.4024, 2355.4024] +25-08-31 03:55:42 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:55:42 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:55:42 | D | - sum error = [ 3114.3006, 2994.2613, 2919.7527, 2897.2249, 2834.5671] +25-08-31 03:55:42 | D | - best error = [ 2355.4024, 2355.4024, 2355.4024, 2355.4024, 2355.4024] +25-08-31 03:55:42 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:55:42 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:55:42 | D | - sum error = [ 2825.6463, 2761.1822, 2769.2321, 2818.3962, 2856.6297] +25-08-31 03:55:42 | D | - best error = [ 2355.4024, 2355.4024, 2355.4024, 2355.4024, 2355.4024] +25-08-31 03:55:42 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:55:42 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:55:42 | D | - sum error = [ 2916.8450, 2957.9866, 3082.9612, 3220.7220] +25-08-31 03:55:42 | D | - best error = [ 2355.4024, 2355.4024, 2355.4024, 2355.4024] +25-08-31 03:55:42 | D | + error = 2355.4024 +25-08-31 03:55:42 | D | + scale = [min=0.2298, max=3.7060] +25-08-31 03:55:42 | D | - transformer_blocks.5.attn add_qkv_proj +25-08-31 03:55:42 | D | + w: sfp4_e2m1_all +25-08-31 03:55:42 | D | + x: sfp4_e2m1_all +25-08-31 03:55:42 | D | + y: None +25-08-31 03:55:42 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:55:42 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:55:42 | D | + x - AbsMax +25-08-31 03:55:42 | D | + x = [min=0.0898, max=24.5000] +25-08-31 03:55:42 | D | + w - AbsMax +25-08-31 03:55:42 | D | + w = [min=0.1118, max=0.3809] +25-08-31 03:55:42 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:55:43 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:56:48 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:56:48 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:56:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:56:48 | D | - sum error = [ 555.6804, 537.3917, 513.9298, 495.0059, 486.2171] +25-08-31 03:56:48 | D | - best error = [ 555.6804, 537.3917, 513.9298, 495.0059, 486.2171] +25-08-31 03:56:48 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:56:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:56:48 | D | - sum error = [ 480.6583, 465.6259, 450.6734, 445.9302, 449.9582] +25-08-31 03:56:48 | D | - best error = [ 480.6583, 465.6259, 450.6734, 445.9302, 445.9302] +25-08-31 03:56:48 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:56:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:56:48 | D | - sum error = [ 451.8710, 446.3748, 455.7158, 453.5828, 467.7474] +25-08-31 03:56:48 | D | - best error = [ 445.9302, 445.9302, 445.9302, 445.9302, 445.9302] +25-08-31 03:56:48 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:56:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:56:48 | D | - sum error = [ 473.1204, 489.0578, 498.0039, 528.8713, 562.7446] +25-08-31 03:56:48 | D | - best error = [ 445.9302, 445.9302, 445.9302, 445.9302, 445.9302] +25-08-31 03:56:48 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:56:48 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:56:48 | D | - sum error = [ 635.2309, 625.6310, 594.8318, 562.5459, 548.6031] +25-08-31 03:56:48 | D | - best error = [ 445.9302, 445.9302, 445.9302, 445.9302, 445.9302] +25-08-31 03:56:48 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:56:48 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:56:48 | D | - sum error = [ 532.7714, 500.9488, 495.3647, 466.8315, 465.4609] +25-08-31 03:56:48 | D | - best error = [ 445.9302, 445.9302, 445.9302, 445.9302, 445.9302] +25-08-31 03:56:48 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:56:48 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:56:48 | D | - sum error = [ 456.1771, 450.0969, 454.4321, 470.7339, 480.8933] +25-08-31 03:56:48 | D | - best error = [ 445.9302, 445.9302, 445.9302, 445.9302, 445.9302] +25-08-31 03:56:48 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:56:48 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:56:48 | D | - sum error = [ 488.2768, 511.1164, 524.3605, 548.4152] +25-08-31 03:56:48 | D | - best error = [ 445.9302, 445.9302, 445.9302, 445.9302] +25-08-31 03:56:48 | D | + error = 445.9302 +25-08-31 03:56:48 | D | + scale = [min=0.3814, max=3.5947] +25-08-31 03:56:48 | D | - transformer_blocks.5.attn.out_proj + transformer_blocks.5.attn.add_out_proj +25-08-31 03:56:48 | D | + w: sfp4_e2m1_all +25-08-31 03:56:48 | D | + x: sfp4_e2m1_all +25-08-31 03:56:48 | D | + y: None +25-08-31 03:56:48 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:56:48 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:56:49 | D | + x - AbsMax +25-08-31 03:56:49 | D | + x = [min=0.8242, max=8.7500] +25-08-31 03:56:49 | D | + w - AbsMax +25-08-31 03:56:49 | D | + w = [min=0.1216, max=0.3066] +25-08-31 03:56:49 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:56:50 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:58:37 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:58:37 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:58:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:58:37 | D | - sum error = [ 4203.1355, 4216.5965, 4208.8291, 4208.5734, 4205.9070] +25-08-31 03:58:37 | D | - best error = [ 4203.1355, 4203.1355, 4203.1355, 4203.1355, 4203.1355] +25-08-31 03:58:37 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:58:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:58:37 | D | - sum error = [ 4206.8580, 4202.2154, 4213.8953, 4212.2725, 4225.3885] +25-08-31 03:58:37 | D | - best error = [ 4203.1355, 4202.2154, 4202.2154, 4202.2154, 4202.2154] +25-08-31 03:58:37 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:58:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:58:37 | D | - sum error = [ 4231.2022, 4235.4431, 4241.3342, 4250.2701, 4260.4605] +25-08-31 03:58:37 | D | - best error = [ 4202.2154, 4202.2154, 4202.2154, 4202.2154, 4202.2154] +25-08-31 03:58:37 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:58:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:58:37 | D | - sum error = [ 4271.3366, 4288.7469, 4295.0998, 4308.5046, 4319.0744] +25-08-31 03:58:37 | D | - best error = [ 4202.2154, 4202.2154, 4202.2154, 4202.2154, 4202.2154] +25-08-31 03:58:37 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:58:37 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:58:37 | D | - sum error = [ 4248.8330, 4240.1625, 4234.7293, 4232.1505, 4231.8939] +25-08-31 03:58:37 | D | - best error = [ 4202.2154, 4202.2154, 4202.2154, 4202.2154, 4202.2154] +25-08-31 03:58:37 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:58:37 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:58:37 | D | - sum error = [ 4217.8288, 4224.0104, 4233.2177, 4227.3680, 4224.1406] +25-08-31 03:58:37 | D | - best error = [ 4202.2154, 4202.2154, 4202.2154, 4202.2154, 4202.2154] +25-08-31 03:58:37 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:58:37 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:58:37 | D | - sum error = [ 4232.9219, 4241.2927, 4247.3705, 4250.3753, 4277.0597] +25-08-31 03:58:37 | D | - best error = [ 4202.2154, 4202.2154, 4202.2154, 4202.2154, 4202.2154] +25-08-31 03:58:37 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:58:37 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:58:37 | D | - sum error = [ 4290.6445, 4304.3738, 4303.8072, 4323.1114] +25-08-31 03:58:37 | D | - best error = [ 4202.2154, 4202.2154, 4202.2154, 4202.2154] +25-08-31 03:58:37 | D | + error = 4202.2154 +25-08-31 03:58:37 | D | + scale = [min=0.9437, max=1.9169] +25-08-31 03:58:38 | D | - transformer_blocks.5.ff.up_proj +25-08-31 03:58:38 | D | + w: sfp4_e2m1_all +25-08-31 03:58:38 | D | + x: sfp4_e2m1_all +25-08-31 03:58:38 | D | + y: None +25-08-31 03:58:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:58:38 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 03:58:39 | D | + x - AbsMax +25-08-31 03:58:39 | D | + x = [min=0.0659, max=22.2500] +25-08-31 03:58:39 | D | + w - AbsMax +25-08-31 03:58:39 | D | + w = [min=0.1113, max=0.6328] +25-08-31 03:58:39 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 03:58:40 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 03:59:58 | D | - x / w range = AbsMax / AbsMax +25-08-31 03:59:58 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 03:59:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:59:58 | D | - sum error = [ 8092.8818, 7866.6595, 7675.0953, 7529.2417, 7396.8412] +25-08-31 03:59:58 | D | - best error = [ 8092.8818, 7866.6595, 7675.0953, 7529.2417, 7396.8412] +25-08-31 03:59:58 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 03:59:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:59:58 | D | - sum error = [ 7288.8762, 7232.9432, 7171.9562, 7141.4119, 7122.1890] +25-08-31 03:59:58 | D | - best error = [ 7288.8762, 7232.9432, 7171.9562, 7141.4119, 7122.1890] +25-08-31 03:59:58 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 03:59:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:59:58 | D | - sum error = [ 7120.2539, 7144.1117, 7181.1292, 7226.1016, 7314.2893] +25-08-31 03:59:58 | D | - best error = [ 7120.2539, 7120.2539, 7120.2539, 7120.2539, 7120.2539] +25-08-31 03:59:58 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:59:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 03:59:58 | D | - sum error = [ 7413.5249, 7532.9792, 7720.0705, 7922.4102, 8182.3569] +25-08-31 03:59:58 | D | - best error = [ 7120.2539, 7120.2539, 7120.2539, 7120.2539, 7120.2539] +25-08-31 03:59:58 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 03:59:58 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 03:59:58 | D | - sum error = [ 8754.7729, 8421.5699, 8149.2417, 7901.1252, 7714.2310] +25-08-31 03:59:58 | D | - best error = [ 7120.2539, 7120.2539, 7120.2539, 7120.2539, 7120.2539] +25-08-31 03:59:58 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 03:59:58 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 03:59:58 | D | - sum error = [ 7550.9610, 7412.0836, 7340.6948, 7256.5149, 7216.7486] +25-08-31 03:59:58 | D | - best error = [ 7120.2539, 7120.2539, 7120.2539, 7120.2539, 7120.2539] +25-08-31 03:59:58 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 03:59:58 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 03:59:58 | D | - sum error = [ 7198.9146, 7199.5194, 7235.1102, 7310.9887, 7401.9641] +25-08-31 03:59:58 | D | - best error = [ 7120.2539, 7120.2539, 7120.2539, 7120.2539, 7120.2539] +25-08-31 03:59:58 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 03:59:58 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 03:59:58 | D | - sum error = [ 7514.8762, 7687.5309, 7899.7408, 8143.1522] +25-08-31 03:59:58 | D | - best error = [ 7120.2539, 7120.2539, 7120.2539, 7120.2539] +25-08-31 03:59:58 | D | + error = 7120.2539 +25-08-31 03:59:58 | D | + scale = [min=0.2567, max=4.7170] +25-08-31 03:59:58 | D | - transformer_blocks.5.ff.down_proj +25-08-31 03:59:58 | D | + w: sfp4_e2m1_all +25-08-31 03:59:58 | D | + x: sfp4_e2m1_all +25-08-31 03:59:58 | D | + y: None +25-08-31 03:59:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 03:59:58 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:00:01 | D | + x - AbsMax +25-08-31 04:00:01 | D | + x = [min=0.0000, max=32.2500] +25-08-31 04:00:01 | D | + w - AbsMax +25-08-31 04:00:01 | D | + w = [min=0.0334, max=1.4688] +25-08-31 04:00:01 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 04:00:03 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 04:01:55 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:01:55 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:01:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:01:55 | D | - sum error = [ 5091.0822, 5058.1843, 5013.6905, 4991.3317, 4970.4656] +25-08-31 04:01:55 | D | - best error = [ 5091.0822, 5058.1843, 5013.6905, 4991.3317, 4970.4656] +25-08-31 04:01:55 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:01:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:01:55 | D | - sum error = [ 4935.5608, 4925.8701, 4914.3999, 4895.9316, 4911.2149] +25-08-31 04:01:55 | D | - best error = [ 4935.5608, 4925.8701, 4914.3999, 4895.9316, 4895.9316] +25-08-31 04:01:55 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:01:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:01:55 | D | - sum error = [ 4910.3635, 4914.1538, 4905.8624, 4927.0602, 4940.2704] +25-08-31 04:01:55 | D | - best error = [ 4895.9316, 4895.9316, 4895.9316, 4895.9316, 4895.9316] +25-08-31 04:01:55 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:01:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:01:55 | D | - sum error = [ 4952.3045, 4982.7647, 5011.9060, 5046.7172, 5079.8020] +25-08-31 04:01:55 | D | - best error = [ 4895.9316, 4895.9316, 4895.9316, 4895.9316, 4895.9316] +25-08-31 04:01:55 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:01:55 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:01:55 | D | - sum error = [ 5504.9154, 5399.8515, 5340.8881, 5287.3387, 5239.4303] +25-08-31 04:01:55 | D | - best error = [ 4895.9316, 4895.9316, 4895.9316, 4895.9316, 4895.9316] +25-08-31 04:01:55 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:01:55 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:01:55 | D | - sum error = [ 5163.8991, 5128.2835, 5077.1791, 5046.1197, 5020.8816] +25-08-31 04:01:55 | D | - best error = [ 4895.9316, 4895.9316, 4895.9316, 4895.9316, 4895.9316] +25-08-31 04:01:55 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:01:55 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:01:55 | D | - sum error = [ 4991.4513, 4971.8164, 4963.5785, 4952.9644, 4967.6443] +25-08-31 04:01:55 | D | - best error = [ 4895.9316, 4895.9316, 4895.9316, 4895.9316, 4895.9316] +25-08-31 04:01:55 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:01:55 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:01:55 | D | - sum error = [ 4968.1276, 4988.2500, 5026.4954, 5077.6762] +25-08-31 04:01:55 | D | - best error = [ 4895.9316, 4895.9316, 4895.9316, 4895.9316] +25-08-31 04:01:55 | D | + error = 4895.9316 +25-08-31 04:01:55 | D | + scale = [min=0.0019, max=4.0125] +25-08-31 04:01:55 | D | - transformer_blocks.5.ff_context.up_proj +25-08-31 04:01:55 | D | + w: sfp4_e2m1_all +25-08-31 04:01:55 | D | + x: sfp4_e2m1_all +25-08-31 04:01:55 | D | + y: None +25-08-31 04:01:55 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:01:55 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:01:55 | D | + x - AbsMax +25-08-31 04:01:55 | D | + x = [min=0.1621, max=20.0000] +25-08-31 04:01:55 | D | + w - AbsMax +25-08-31 04:01:55 | D | + w = [min=0.0859, max=0.5430] +25-08-31 04:01:55 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 04:01:56 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 04:02:39 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:02:39 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:02:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:02:39 | D | - sum error = [ 2811.2503, 2758.1923, 2703.1139, 2653.1104, 2608.8971] +25-08-31 04:02:39 | D | - best error = [ 2811.2503, 2758.1923, 2703.1139, 2653.1104, 2608.8971] +25-08-31 04:02:39 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:02:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:02:39 | D | - sum error = [ 2560.2947, 2536.4970, 2535.7147, 2490.1782, 2474.3166] +25-08-31 04:02:39 | D | - best error = [ 2560.2947, 2536.4970, 2535.7147, 2490.1782, 2474.3166] +25-08-31 04:02:39 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:02:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:02:39 | D | - sum error = [ 2473.8936, 2447.9476, 2448.3422, 2423.8582, 2416.2270] +25-08-31 04:02:39 | D | - best error = [ 2473.8936, 2447.9476, 2447.9476, 2423.8582, 2416.2270] +25-08-31 04:02:39 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:02:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:02:39 | D | - sum error = [ 2450.0279, 2456.9267, 2462.8247, 2492.5592, 2512.1498] +25-08-31 04:02:39 | D | - best error = [ 2416.2270, 2416.2270, 2416.2270, 2416.2270, 2416.2270] +25-08-31 04:02:39 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:02:39 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:02:39 | D | - sum error = [ 2696.6446, 2635.2848, 2587.1274, 2562.7767, 2530.9277] +25-08-31 04:02:39 | D | - best error = [ 2416.2270, 2416.2270, 2416.2270, 2416.2270, 2416.2270] +25-08-31 04:02:39 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:02:39 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:02:39 | D | - sum error = [ 2506.6773, 2477.6742, 2458.4262, 2477.5700, 2454.1412] +25-08-31 04:02:39 | D | - best error = [ 2416.2270, 2416.2270, 2416.2270, 2416.2270, 2416.2270] +25-08-31 04:02:39 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:02:39 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:02:39 | D | - sum error = [ 2445.4975, 2442.2613, 2446.7978, 2447.2162, 2460.7065] +25-08-31 04:02:39 | D | - best error = [ 2416.2270, 2416.2270, 2416.2270, 2416.2270, 2416.2270] +25-08-31 04:02:39 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:02:39 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:02:39 | D | - sum error = [ 2459.1470, 2481.3379, 2487.4505, 2518.1414] +25-08-31 04:02:39 | D | - best error = [ 2416.2270, 2416.2270, 2416.2270, 2416.2270] +25-08-31 04:02:39 | D | + error = 2416.2270 +25-08-31 04:02:39 | D | + scale = [min=0.2798, max=8.1418] +25-08-31 04:02:40 | D | - transformer_blocks.5.ff_context.down_proj +25-08-31 04:02:40 | D | + w: sfp4_e2m1_all +25-08-31 04:02:40 | D | + x: sfp4_e2m1_all +25-08-31 04:02:40 | D | + y: None +25-08-31 04:02:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:02:40 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:02:40 | D | + x - AbsMax +25-08-31 04:02:40 | D | + x = [min=0.0000, max=27.8750] +25-08-31 04:02:40 | D | + w - AbsMax +25-08-31 04:02:40 | D | + w = [min=0.0244, max=0.5547] +25-08-31 04:02:40 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 04:02:41 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 04:03:29 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:03:29 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:03:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:03:29 | D | - sum error = [ 601.7368, 585.8149, 574.7458, 570.9648, 558.2651] +25-08-31 04:03:29 | D | - best error = [ 601.7368, 585.8149, 574.7458, 570.9648, 558.2651] +25-08-31 04:03:29 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:03:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:03:29 | D | - sum error = [ 555.3007, 551.3971, 546.2345, 546.2135, 544.0942] +25-08-31 04:03:29 | D | - best error = [ 555.3007, 551.3971, 546.2345, 546.2135, 544.0942] +25-08-31 04:03:29 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:03:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:03:29 | D | - sum error = [ 545.5933, 550.7597, 554.9714, 562.1660, 571.3171] +25-08-31 04:03:29 | D | - best error = [ 544.0942, 544.0942, 544.0942, 544.0942, 544.0942] +25-08-31 04:03:29 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:03:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:03:29 | D | - sum error = [ 582.3494, 595.0604, 616.9133, 638.3647, 671.5925] +25-08-31 04:03:29 | D | - best error = [ 544.0942, 544.0942, 544.0942, 544.0942, 544.0942] +25-08-31 04:03:29 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:03:29 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:03:29 | D | - sum error = [ 675.1018, 656.2052, 672.4324, 631.7222, 621.4457] +25-08-31 04:03:29 | D | - best error = [ 544.0942, 544.0942, 544.0942, 544.0942, 544.0942] +25-08-31 04:03:29 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:03:29 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:03:29 | D | - sum error = [ 615.3423, 607.2067, 602.4778, 596.6570, 593.6498] +25-08-31 04:03:29 | D | - best error = [ 544.0942, 544.0942, 544.0942, 544.0942, 544.0942] +25-08-31 04:03:29 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:03:29 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:03:29 | D | - sum error = [ 592.1269, 592.4286, 594.2194, 599.5997, 606.3478] +25-08-31 04:03:29 | D | - best error = [ 544.0942, 544.0942, 544.0942, 544.0942, 544.0942] +25-08-31 04:03:29 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:03:29 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:03:29 | D | - sum error = [ 615.4936, 629.7920, 647.5450, 673.1228] +25-08-31 04:03:29 | D | - best error = [ 544.0942, 544.0942, 544.0942, 544.0942] +25-08-31 04:03:29 | D | + error = 544.0942 +25-08-31 04:03:29 | D | + scale = [min=0.0008, max=4.4704] +25-08-31 04:03:49 | D | - Smoothing Diffusion Block transformer_blocks.6 +25-08-31 04:03:49 | D | - Skipping Module transformer_blocks.6.norm1.linear +25-08-31 04:03:49 | D | - Skipping Module transformer_blocks.6.norm1_context.linear +25-08-31 04:03:49 | D | - Smoothing Transformer Block transformer_blocks.6 +25-08-31 04:03:49 | D | - transformer_blocks.6.attn.qkv_proj +25-08-31 04:03:49 | D | + w: sfp4_e2m1_all +25-08-31 04:03:49 | D | + x: sfp4_e2m1_all +25-08-31 04:03:49 | D | + y: None +25-08-31 04:03:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:03:49 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:03:49 | D | + x - AbsMax +25-08-31 04:03:49 | D | + x = [min=0.0442, max=16.8750] +25-08-31 04:03:49 | D | + w - AbsMax +25-08-31 04:03:49 | D | + w = [min=0.1001, max=0.5664] +25-08-31 04:03:49 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 04:03:50 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 04:05:03 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:05:03 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:05:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:05:03 | D | - sum error = [ 2313.3014, 2221.2258, 2200.6313, 2190.3691, 2145.3631] +25-08-31 04:05:03 | D | - best error = [ 2313.3014, 2221.2258, 2200.6313, 2190.3691, 2145.3631] +25-08-31 04:05:03 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:05:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:05:03 | D | - sum error = [ 2137.3468, 2173.8368, 2149.8099, 2169.0236, 2231.2796] +25-08-31 04:05:03 | D | - best error = [ 2137.3468, 2137.3468, 2137.3468, 2137.3468, 2137.3468] +25-08-31 04:05:03 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:05:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:05:03 | D | - sum error = [ 2144.3816, 2225.4687, 2284.8127, 2344.1104, 2352.6473] +25-08-31 04:05:03 | D | - best error = [ 2137.3468, 2137.3468, 2137.3468, 2137.3468, 2137.3468] +25-08-31 04:05:03 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:05:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:05:03 | D | - sum error = [ 2429.4188, 2590.7982, 2652.5679, 2774.7040, 2925.2569] +25-08-31 04:05:03 | D | - best error = [ 2137.3468, 2137.3468, 2137.3468, 2137.3468, 2137.3468] +25-08-31 04:05:03 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:05:03 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:05:03 | D | - sum error = [ 3322.3310, 3180.9337, 3045.8764, 2940.8317, 2863.2028] +25-08-31 04:05:03 | D | - best error = [ 2137.3468, 2137.3468, 2137.3468, 2137.3468, 2137.3468] +25-08-31 04:05:03 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:05:03 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:05:03 | D | - sum error = [ 2811.5796, 2686.7252, 2666.2700, 2621.1720, 2619.5339] +25-08-31 04:05:03 | D | - best error = [ 2137.3468, 2137.3468, 2137.3468, 2137.3468, 2137.3468] +25-08-31 04:05:03 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:05:03 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:05:03 | D | - sum error = [ 2584.7735, 2574.3986, 2527.9510, 2613.4544, 2591.0360] +25-08-31 04:05:03 | D | - best error = [ 2137.3468, 2137.3468, 2137.3468, 2137.3468, 2137.3468] +25-08-31 04:05:03 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:05:03 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:05:03 | D | - sum error = [ 2668.2299, 2725.7353, 2851.1001, 2943.7041] +25-08-31 04:05:03 | D | - best error = [ 2137.3468, 2137.3468, 2137.3468, 2137.3468] +25-08-31 04:05:03 | D | + error = 2137.3468 +25-08-31 04:05:03 | D | + scale = [min=0.4585, max=2.0268] +25-08-31 04:05:03 | D | - transformer_blocks.6.attn add_qkv_proj +25-08-31 04:05:03 | D | + w: sfp4_e2m1_all +25-08-31 04:05:03 | D | + x: sfp4_e2m1_all +25-08-31 04:05:03 | D | + y: None +25-08-31 04:05:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:05:03 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:05:04 | D | + x - AbsMax +25-08-31 04:05:04 | D | + x = [min=0.1289, max=19.3750] +25-08-31 04:05:04 | D | + w - AbsMax +25-08-31 04:05:04 | D | + w = [min=0.1143, max=0.5195] +25-08-31 04:05:04 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 04:05:05 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 04:06:07 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:06:07 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:06:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:06:07 | D | - sum error = [ 968.5179, 967.3205, 943.8137, 911.8698, 889.7386] +25-08-31 04:06:07 | D | - best error = [ 968.5179, 967.3205, 943.8137, 911.8698, 889.7386] +25-08-31 04:06:07 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:06:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:06:07 | D | - sum error = [ 885.0997, 926.1279, 868.5936, 868.3427, 876.9573] +25-08-31 04:06:07 | D | - best error = [ 885.0997, 885.0997, 868.5936, 868.3427, 868.3427] +25-08-31 04:06:07 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:06:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:06:07 | D | - sum error = [ 862.0327, 840.6350, 878.6075, 868.5567, 889.2648] +25-08-31 04:06:07 | D | - best error = [ 862.0327, 840.6350, 840.6350, 840.6350, 840.6350] +25-08-31 04:06:07 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:06:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:06:07 | D | - sum error = [ 929.1098, 910.2454, 956.7055, 969.0482, 956.3131] +25-08-31 04:06:07 | D | - best error = [ 840.6350, 840.6350, 840.6350, 840.6350, 840.6350] +25-08-31 04:06:07 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:06:07 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:06:07 | D | - sum error = [ 1070.5734, 1025.6380, 971.0144, 974.6798, 942.2221] +25-08-31 04:06:07 | D | - best error = [ 840.6350, 840.6350, 840.6350, 840.6350, 840.6350] +25-08-31 04:06:07 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:06:07 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:06:07 | D | - sum error = [ 914.8561, 912.1556, 898.8927, 894.0192, 894.3073] +25-08-31 04:06:07 | D | - best error = [ 840.6350, 840.6350, 840.6350, 840.6350, 840.6350] +25-08-31 04:06:07 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:06:07 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:06:07 | D | - sum error = [ 921.4324, 862.3375, 881.2412, 880.4382, 915.9433] +25-08-31 04:06:07 | D | - best error = [ 840.6350, 840.6350, 840.6350, 840.6350, 840.6350] +25-08-31 04:06:07 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:06:07 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:06:07 | D | - sum error = [ 912.9037, 936.5818, 951.4108, 964.0069] +25-08-31 04:06:07 | D | - best error = [ 840.6350, 840.6350, 840.6350, 840.6350] +25-08-31 04:06:07 | D | + error = 840.6350 +25-08-31 04:06:07 | D | + scale = [min=0.3241, max=5.1048] +25-08-31 04:06:07 | D | - transformer_blocks.6.attn.out_proj + transformer_blocks.6.attn.add_out_proj +25-08-31 04:06:07 | D | + w: sfp4_e2m1_all +25-08-31 04:06:07 | D | + x: sfp4_e2m1_all +25-08-31 04:06:07 | D | + y: None +25-08-31 04:06:07 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:06:07 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 04:06:08 | D | + x - AbsMax +25-08-31 04:06:08 | D | + x = [min=0.9961, max=12.0000] +25-08-31 04:06:08 | D | + w - AbsMax +25-08-31 04:06:08 | D | + w = [min=0.1226, max=0.4727] +25-08-31 04:06:08 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 04:06:09 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 04:07:54 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:07:54 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:07:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:07:54 | D | - sum error = [ 4025.2621, 4023.5880, 4023.5042, 4022.2612, 4019.9025] +25-08-31 04:07:54 | D | - best error = [ 4025.2621, 4023.5880, 4023.5042, 4022.2612, 4019.9025] +25-08-31 04:07:54 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:07:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:07:54 | D | - sum error = [ 4022.9786, 4029.9529, 4025.1810, 4036.4726, 4047.9118] +25-08-31 04:07:54 | D | - best error = [ 4019.9025, 4019.9025, 4019.9025, 4019.9025, 4019.9025] +25-08-31 04:07:54 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:07:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:07:54 | D | - sum error = [ 4054.0048, 4078.3986, 4093.3488, 4112.1919, 4135.8554] +25-08-31 04:07:54 | D | - best error = [ 4019.9025, 4019.9025, 4019.9025, 4019.9025, 4019.9025] +25-08-31 04:07:54 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:07:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:07:54 | D | - sum error = [ 4139.6804, 4165.1956, 4203.4031, 4213.4736, 4244.2929] +25-08-31 04:07:54 | D | - best error = [ 4019.9025, 4019.9025, 4019.9025, 4019.9025, 4019.9025] +25-08-31 04:07:54 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:07:54 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:07:54 | D | - sum error = [ 4066.6377, 4057.9566, 4046.0378, 4027.8221, 4029.8811] +25-08-31 04:07:54 | D | - best error = [ 4019.9025, 4019.9025, 4019.9025, 4019.9025, 4019.9025] +25-08-31 04:07:54 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:07:54 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:07:54 | D | - sum error = [ 4040.6108, 4049.8254, 4048.7501, 4058.0010, 4058.2953] +25-08-31 04:07:54 | D | - best error = [ 4019.9025, 4019.9025, 4019.9025, 4019.9025, 4019.9025] +25-08-31 04:07:54 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:07:54 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:07:54 | D | - sum error = [ 4071.4456, 4075.7831, 4086.5718, 4108.5423, 4133.5203] +25-08-31 04:07:54 | D | - best error = [ 4019.9025, 4019.9025, 4019.9025, 4019.9025, 4019.9025] +25-08-31 04:07:54 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:07:54 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:07:54 | D | - sum error = [ 4151.4275, 4190.4279, 4198.9930, 4227.6679] +25-08-31 04:07:54 | D | - best error = [ 4019.9025, 4019.9025, 4019.9025, 4019.9025] +25-08-31 04:07:54 | D | + error = 4019.9025 +25-08-31 04:07:54 | D | + scale = [min=0.9992, max=1.6438] +25-08-31 04:07:54 | D | - transformer_blocks.6.ff.up_proj +25-08-31 04:07:54 | D | + w: sfp4_e2m1_all +25-08-31 04:07:54 | D | + x: sfp4_e2m1_all +25-08-31 04:07:54 | D | + y: None +25-08-31 04:07:54 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:07:54 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:07:55 | D | + x - AbsMax +25-08-31 04:07:55 | D | + x = [min=0.0532, max=8.0000] +25-08-31 04:07:55 | D | + w - AbsMax +25-08-31 04:07:55 | D | + w = [min=0.1089, max=0.5156] +25-08-31 04:07:55 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 04:07:56 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 04:09:10 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:09:10 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:09:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:09:10 | D | - sum error = [ 8206.0977, 8063.2867, 7956.7581, 7862.9928, 7778.2965] +25-08-31 04:09:10 | D | - best error = [ 8206.0977, 8063.2867, 7956.7581, 7862.9928, 7778.2965] +25-08-31 04:09:10 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:09:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:09:10 | D | - sum error = [ 7703.1334, 7664.5530, 7616.2920, 7621.6631, 7578.0662] +25-08-31 04:09:10 | D | - best error = [ 7703.1334, 7664.5530, 7616.2920, 7616.2920, 7578.0662] +25-08-31 04:09:10 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:09:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:09:10 | D | - sum error = [ 7570.2016, 7572.0230, 7608.3883, 7619.1333, 7663.1980] +25-08-31 04:09:10 | D | - best error = [ 7570.2016, 7570.2016, 7570.2016, 7570.2016, 7570.2016] +25-08-31 04:09:10 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:09:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:09:10 | D | - sum error = [ 7722.8187, 7781.8057, 7892.4622, 8011.0604, 8165.8996] +25-08-31 04:09:10 | D | - best error = [ 7570.2016, 7570.2016, 7570.2016, 7570.2016, 7570.2016] +25-08-31 04:09:10 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:09:10 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:09:10 | D | - sum error = [ 8488.0990, 8319.5298, 8167.6273, 8015.2899, 7909.0541] +25-08-31 04:09:10 | D | - best error = [ 7570.2016, 7570.2016, 7570.2016, 7570.2016, 7570.2016] +25-08-31 04:09:10 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:09:10 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:09:10 | D | - sum error = [ 7828.3559, 7749.2800, 7679.9620, 7653.0431, 7639.3696] +25-08-31 04:09:10 | D | - best error = [ 7570.2016, 7570.2016, 7570.2016, 7570.2016, 7570.2016] +25-08-31 04:09:10 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:09:10 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:09:10 | D | - sum error = [ 7620.6895, 7616.8314, 7639.4895, 7667.8863, 7710.0446] +25-08-31 04:09:10 | D | - best error = [ 7570.2016, 7570.2016, 7570.2016, 7570.2016, 7570.2016] +25-08-31 04:09:10 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:09:10 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:09:10 | D | - sum error = [ 7767.6359, 7877.9429, 7991.2294, 8150.0969] +25-08-31 04:09:10 | D | - best error = [ 7570.2016, 7570.2016, 7570.2016, 7570.2016] +25-08-31 04:09:10 | D | + error = 7570.2016 +25-08-31 04:09:10 | D | + scale = [min=0.2307, max=2.8284] +25-08-31 04:09:10 | D | - transformer_blocks.6.ff.down_proj +25-08-31 04:09:10 | D | + w: sfp4_e2m1_all +25-08-31 04:09:10 | D | + x: sfp4_e2m1_all +25-08-31 04:09:10 | D | + y: None +25-08-31 04:09:10 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:09:10 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:09:13 | D | + x - AbsMax +25-08-31 04:09:13 | D | + x = [min=0.0000, max=36.5000] +25-08-31 04:09:13 | D | + w - AbsMax +25-08-31 04:09:13 | D | + w = [min=0.0554, max=0.8047] +25-08-31 04:09:13 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 04:09:15 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 04:11:04 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:11:04 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:11:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:11:04 | D | - sum error = [ 5294.9822, 5282.4763, 5331.2458, 5279.0747, 5257.6567] +25-08-31 04:11:04 | D | - best error = [ 5294.9822, 5282.4763, 5282.4763, 5279.0747, 5257.6567] +25-08-31 04:11:04 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:11:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:11:04 | D | - sum error = [ 5254.4621, 5271.2107, 5268.5649, 5265.6426, 5294.0170] +25-08-31 04:11:04 | D | - best error = [ 5254.4621, 5254.4621, 5254.4621, 5254.4621, 5254.4621] +25-08-31 04:11:04 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:11:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:11:04 | D | - sum error = [ 5287.8430, 5303.5581, 5319.0294, 5327.8812, 5355.1924] +25-08-31 04:11:04 | D | - best error = [ 5254.4621, 5254.4621, 5254.4621, 5254.4621, 5254.4621] +25-08-31 04:11:04 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:11:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:11:04 | D | - sum error = [ 5386.9594, 5414.8309, 5434.7467, 5468.4093, 5511.8295] +25-08-31 04:11:04 | D | - best error = [ 5254.4621, 5254.4621, 5254.4621, 5254.4621, 5254.4621] +25-08-31 04:11:04 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:11:04 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:11:04 | D | - sum error = [ 5650.2280, 5599.2830, 5546.8460, 5504.3083, 5483.0482] +25-08-31 04:11:04 | D | - best error = [ 5254.4621, 5254.4621, 5254.4621, 5254.4621, 5254.4621] +25-08-31 04:11:04 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:11:04 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:11:04 | D | - sum error = [ 5454.7400, 5436.4442, 5407.8454, 5395.6638, 5388.6703] +25-08-31 04:11:04 | D | - best error = [ 5254.4621, 5254.4621, 5254.4621, 5254.4621, 5254.4621] +25-08-31 04:11:04 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:11:04 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:11:04 | D | - sum error = [ 5360.1470, 5354.9047, 5358.7857, 5367.2302, 5385.4556] +25-08-31 04:11:04 | D | - best error = [ 5254.4621, 5254.4621, 5254.4621, 5254.4621, 5254.4621] +25-08-31 04:11:04 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:11:04 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:11:04 | D | - sum error = [ 5401.8630, 5417.3927, 5462.0115, 5506.1813] +25-08-31 04:11:04 | D | - best error = [ 5254.4621, 5254.4621, 5254.4621, 5254.4621] +25-08-31 04:11:04 | D | + error = 5254.4621 +25-08-31 04:11:04 | D | + scale = [min=0.0197, max=2.4580] +25-08-31 04:11:04 | D | - transformer_blocks.6.ff_context.up_proj +25-08-31 04:11:04 | D | + w: sfp4_e2m1_all +25-08-31 04:11:04 | D | + x: sfp4_e2m1_all +25-08-31 04:11:04 | D | + y: None +25-08-31 04:11:04 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:11:04 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:11:04 | D | + x - AbsMax +25-08-31 04:11:04 | D | + x = [min=0.1338, max=14.7500] +25-08-31 04:11:04 | D | + w - AbsMax +25-08-31 04:11:04 | D | + w = [min=0.0820, max=0.6172] +25-08-31 04:11:04 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 04:11:05 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 04:11:48 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:11:48 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:11:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:11:48 | D | - sum error = [ 3158.0204, 3111.2093, 3058.9575, 3011.0181, 2970.0221] +25-08-31 04:11:48 | D | - best error = [ 3158.0204, 3111.2093, 3058.9575, 3011.0181, 2970.0221] +25-08-31 04:11:48 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:11:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:11:48 | D | - sum error = [ 2917.5090, 2911.0240, 2881.4822, 2872.2140, 2860.7384] +25-08-31 04:11:48 | D | - best error = [ 2917.5090, 2911.0240, 2881.4822, 2872.2140, 2860.7384] +25-08-31 04:11:48 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:11:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:11:48 | D | - sum error = [ 2859.8313, 2859.8830, 2855.7934, 2852.5725, 2853.6261] +25-08-31 04:11:48 | D | - best error = [ 2859.8313, 2859.8313, 2855.7934, 2852.5725, 2852.5725] +25-08-31 04:11:48 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:11:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:11:48 | D | - sum error = [ 2852.5945, 2858.5634, 2875.5299, 2896.8405, 2911.3578] +25-08-31 04:11:48 | D | - best error = [ 2852.5725, 2852.5725, 2852.5725, 2852.5725, 2852.5725] +25-08-31 04:11:48 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:11:48 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:11:48 | D | - sum error = [ 3030.3450, 2998.7551, 2959.4810, 2912.1444, 2878.1244] +25-08-31 04:11:48 | D | - best error = [ 2852.5725, 2852.5725, 2852.5725, 2852.5725, 2852.5725] +25-08-31 04:11:48 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:11:48 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:11:48 | D | - sum error = [ 2878.4337, 2856.5982, 2841.7491, 2863.6806, 2828.4907] +25-08-31 04:11:48 | D | - best error = [ 2852.5725, 2852.5725, 2841.7491, 2841.7491, 2828.4907] +25-08-31 04:11:48 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:11:48 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:11:48 | D | - sum error = [ 2834.7325, 2821.7889, 2821.5033, 2829.4554, 2831.8227] +25-08-31 04:11:48 | D | - best error = [ 2828.4907, 2821.7889, 2821.5033, 2821.5033, 2821.5033] +25-08-31 04:11:48 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:11:48 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:11:48 | D | - sum error = [ 2854.1232, 2911.9851, 2900.1228, 2913.2000] +25-08-31 04:11:48 | D | - best error = [ 2821.5033, 2821.5033, 2821.5033, 2821.5033] +25-08-31 04:11:48 | D | + error = 2821.5033 +25-08-31 04:11:48 | D | + scale = [min=0.5362, max=11.5682] +25-08-31 04:11:48 | D | - transformer_blocks.6.ff_context.down_proj +25-08-31 04:11:48 | D | + w: sfp4_e2m1_all +25-08-31 04:11:48 | D | + x: sfp4_e2m1_all +25-08-31 04:11:48 | D | + y: None +25-08-31 04:11:48 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:11:48 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 04:11:49 | D | + x - AbsMax +25-08-31 04:11:49 | D | + x = [min=0.0000, max=27.5000] +25-08-31 04:11:49 | D | + w - AbsMax +25-08-31 04:11:49 | D | + w = [min=0.0215, max=0.5273] +25-08-31 04:11:49 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 04:11:49 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 04:12:38 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:12:38 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:12:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:12:38 | D | - sum error = [ 643.7077, 623.6232, 609.2018, 597.3882, 586.1923] +25-08-31 04:12:38 | D | - best error = [ 643.7077, 623.6232, 609.2018, 597.3882, 586.1923] +25-08-31 04:12:38 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:12:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:12:38 | D | - sum error = [ 578.5733, 574.4599, 570.6030, 567.8051, 570.1715] +25-08-31 04:12:38 | D | - best error = [ 578.5733, 574.4599, 570.6030, 567.8051, 567.8051] +25-08-31 04:12:38 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:12:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:12:38 | D | - sum error = [ 565.8019, 568.0680, 574.1522, 577.3619, 583.0192] +25-08-31 04:12:38 | D | - best error = [ 565.8019, 565.8019, 565.8019, 565.8019, 565.8019] +25-08-31 04:12:38 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:12:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:12:38 | D | - sum error = [ 590.9967, 604.3663, 623.0998, 642.7760, 663.4142] +25-08-31 04:12:38 | D | - best error = [ 565.8019, 565.8019, 565.8019, 565.8019, 565.8019] +25-08-31 04:12:38 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:12:38 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:12:38 | D | - sum error = [ 706.5983, 685.0715, 671.3294, 656.9338, 646.9066] +25-08-31 04:12:38 | D | - best error = [ 565.8019, 565.8019, 565.8019, 565.8019, 565.8019] +25-08-31 04:12:38 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:12:38 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:12:38 | D | - sum error = [ 636.3843, 629.8702, 623.5694, 618.2775, 616.1887] +25-08-31 04:12:38 | D | - best error = [ 565.8019, 565.8019, 565.8019, 565.8019, 565.8019] +25-08-31 04:12:38 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:12:38 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:12:38 | D | - sum error = [ 613.2216, 611.4126, 611.5500, 617.7796, 620.0351] +25-08-31 04:12:38 | D | - best error = [ 565.8019, 565.8019, 565.8019, 565.8019, 565.8019] +25-08-31 04:12:38 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:12:38 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:12:38 | D | - sum error = [ 627.6043, 640.0291, 654.8931, 671.9944] +25-08-31 04:12:38 | D | - best error = [ 565.8019, 565.8019, 565.8019, 565.8019] +25-08-31 04:12:38 | D | + error = 565.8019 +25-08-31 04:12:38 | D | + scale = [min=0.0004, max=5.2440] +25-08-31 04:12:57 | D | - Smoothing Diffusion Block transformer_blocks.7 +25-08-31 04:12:57 | D | - Skipping Module transformer_blocks.7.norm1.linear +25-08-31 04:12:57 | D | - Skipping Module transformer_blocks.7.norm1_context.linear +25-08-31 04:12:57 | D | - Smoothing Transformer Block transformer_blocks.7 +25-08-31 04:12:57 | D | - transformer_blocks.7.attn.qkv_proj +25-08-31 04:12:57 | D | + w: sfp4_e2m1_all +25-08-31 04:12:57 | D | + x: sfp4_e2m1_all +25-08-31 04:12:57 | D | + y: None +25-08-31 04:12:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:12:57 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:12:58 | D | + x - AbsMax +25-08-31 04:12:58 | D | + x = [min=0.0457, max=17.3750] +25-08-31 04:12:58 | D | + w - AbsMax +25-08-31 04:12:58 | D | + w = [min=0.1138, max=0.8438] +25-08-31 04:12:58 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 04:12:59 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 04:14:13 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:14:13 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:14:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:14:13 | D | - sum error = [ 3046.4222, 2935.9818, 2890.0910, 2964.4038, 2899.4684] +25-08-31 04:14:13 | D | - best error = [ 3046.4222, 2935.9818, 2890.0910, 2890.0910, 2890.0910] +25-08-31 04:14:13 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:14:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:14:13 | D | - sum error = [ 2827.5193, 2856.0497, 2823.1641, 2899.0095, 2880.4814] +25-08-31 04:14:13 | D | - best error = [ 2827.5193, 2827.5193, 2823.1641, 2823.1641, 2823.1641] +25-08-31 04:14:13 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:14:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:14:13 | D | - sum error = [ 2797.8124, 2873.0771, 2819.9344, 2933.1381, 3028.8493] +25-08-31 04:14:13 | D | - best error = [ 2797.8124, 2797.8124, 2797.8124, 2797.8124, 2797.8124] +25-08-31 04:14:13 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:14:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:14:13 | D | - sum error = [ 2973.1907, 3070.5679, 3095.9286, 3310.2719, 3347.5926] +25-08-31 04:14:13 | D | - best error = [ 2797.8124, 2797.8124, 2797.8124, 2797.8124, 2797.8124] +25-08-31 04:14:13 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:14:13 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:14:13 | D | - sum error = [ 6281.9904, 5653.7207, 5232.7675, 4904.1223, 4593.0332] +25-08-31 04:14:13 | D | - best error = [ 2797.8124, 2797.8124, 2797.8124, 2797.8124, 2797.8124] +25-08-31 04:14:13 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:14:13 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:14:13 | D | - sum error = [ 4355.0660, 4269.5140, 3954.4606, 3841.3284, 3679.8808] +25-08-31 04:14:13 | D | - best error = [ 2797.8124, 2797.8124, 2797.8124, 2797.8124, 2797.8124] +25-08-31 04:14:13 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:14:13 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:14:13 | D | - sum error = [ 3619.0687, 3462.0247, 3311.9595, 3343.0408, 3305.6431] +25-08-31 04:14:13 | D | - best error = [ 2797.8124, 2797.8124, 2797.8124, 2797.8124, 2797.8124] +25-08-31 04:14:13 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:14:13 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:14:13 | D | - sum error = [ 3259.3234, 3321.7199, 3338.4396, 3391.4094] +25-08-31 04:14:13 | D | - best error = [ 2797.8124, 2797.8124, 2797.8124, 2797.8124] +25-08-31 04:14:13 | D | + error = 2797.8124 +25-08-31 04:14:13 | D | + scale = [min=0.2137, max=4.1683] +25-08-31 04:14:14 | D | - transformer_blocks.7.attn add_qkv_proj +25-08-31 04:14:14 | D | + w: sfp4_e2m1_all +25-08-31 04:14:14 | D | + x: sfp4_e2m1_all +25-08-31 04:14:14 | D | + y: None +25-08-31 04:14:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:14:14 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:14:14 | D | + x - AbsMax +25-08-31 04:14:14 | D | + x = [min=0.1147, max=29.0000] +25-08-31 04:14:14 | D | + w - AbsMax +25-08-31 04:14:14 | D | + w = [min=0.1069, max=0.4434] +25-08-31 04:14:14 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 04:14:15 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 04:15:24 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:15:24 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:15:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:15:24 | D | - sum error = [ 807.0525, 781.1464, 788.3603, 742.8318, 737.1430] +25-08-31 04:15:24 | D | - best error = [ 807.0525, 781.1464, 781.1464, 742.8318, 737.1430] +25-08-31 04:15:24 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:15:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:15:24 | D | - sum error = [ 703.3803, 678.7679, 672.9301, 643.7885, 668.2037] +25-08-31 04:15:24 | D | - best error = [ 703.3803, 678.7679, 672.9301, 643.7885, 643.7885] +25-08-31 04:15:24 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:15:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:15:24 | D | - sum error = [ 664.8064, 655.0084, 651.4116, 654.7476, 650.3053] +25-08-31 04:15:24 | D | - best error = [ 643.7885, 643.7885, 643.7885, 643.7885, 643.7885] +25-08-31 04:15:24 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:15:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:15:24 | D | - sum error = [ 676.7926, 674.5144, 699.2490, 695.5730, 733.4754] +25-08-31 04:15:24 | D | - best error = [ 643.7885, 643.7885, 643.7885, 643.7885, 643.7885] +25-08-31 04:15:24 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:15:24 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:15:24 | D | - sum error = [ 969.8667, 914.3241, 881.0559, 843.9874, 788.3549] +25-08-31 04:15:24 | D | - best error = [ 643.7885, 643.7885, 643.7885, 643.7885, 643.7885] +25-08-31 04:15:24 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:15:24 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:15:24 | D | - sum error = [ 775.2519, 743.9123, 705.3918, 701.6522, 674.3006] +25-08-31 04:15:24 | D | - best error = [ 643.7885, 643.7885, 643.7885, 643.7885, 643.7885] +25-08-31 04:15:24 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:15:24 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:15:24 | D | - sum error = [ 703.0571, 685.3221, 665.7649, 669.3877, 666.2740] +25-08-31 04:15:24 | D | - best error = [ 643.7885, 643.7885, 643.7885, 643.7885, 643.7885] +25-08-31 04:15:24 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:15:24 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:15:24 | D | - sum error = [ 677.2888, 703.4941, 724.0910, 747.8876] +25-08-31 04:15:24 | D | - best error = [ 643.7885, 643.7885, 643.7885, 643.7885] +25-08-31 04:15:24 | D | + error = 643.7885 +25-08-31 04:15:24 | D | + scale = [min=0.4206, max=3.8456] +25-08-31 04:15:24 | D | - transformer_blocks.7.attn.out_proj + transformer_blocks.7.attn.add_out_proj +25-08-31 04:15:24 | D | + w: sfp4_e2m1_all +25-08-31 04:15:24 | D | + x: sfp4_e2m1_all +25-08-31 04:15:24 | D | + y: None +25-08-31 04:15:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:15:24 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 04:15:25 | D | + x - AbsMax +25-08-31 04:15:25 | D | + x = [min=0.8594, max=10.5000] +25-08-31 04:15:25 | D | + w - AbsMax +25-08-31 04:15:25 | D | + w = [min=0.1235, max=0.3750] +25-08-31 04:15:25 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 04:15:26 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 04:17:19 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:17:19 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:17:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:17:19 | D | - sum error = [ 4274.2873, 4267.5518, 4272.8515, 4282.0936, 4277.7999] +25-08-31 04:17:19 | D | - best error = [ 4274.2873, 4267.5518, 4267.5518, 4267.5518, 4267.5518] +25-08-31 04:17:19 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:17:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:17:19 | D | - sum error = [ 4288.2969, 4303.0071, 4302.3085, 4311.9007, 4332.9967] +25-08-31 04:17:19 | D | - best error = [ 4267.5518, 4267.5518, 4267.5518, 4267.5518, 4267.5518] +25-08-31 04:17:19 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:17:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:17:19 | D | - sum error = [ 4330.3986, 4341.9698, 4357.9626, 4381.6125, 4395.4854] +25-08-31 04:17:19 | D | - best error = [ 4267.5518, 4267.5518, 4267.5518, 4267.5518, 4267.5518] +25-08-31 04:17:19 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:17:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:17:19 | D | - sum error = [ 4405.2846, 4425.3612, 4442.8878, 4451.3686, 4479.6214] +25-08-31 04:17:19 | D | - best error = [ 4267.5518, 4267.5518, 4267.5518, 4267.5518, 4267.5518] +25-08-31 04:17:19 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:17:19 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:17:19 | D | - sum error = [ 4333.8920, 4303.3199, 4305.8309, 4314.4318, 4310.0220] +25-08-31 04:17:19 | D | - best error = [ 4267.5518, 4267.5518, 4267.5518, 4267.5518, 4267.5518] +25-08-31 04:17:19 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:17:19 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:17:19 | D | - sum error = [ 4307.8700, 4315.7352, 4336.9771, 4343.4035, 4343.4157] +25-08-31 04:17:19 | D | - best error = [ 4267.5518, 4267.5518, 4267.5518, 4267.5518, 4267.5518] +25-08-31 04:17:19 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:17:19 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:17:19 | D | - sum error = [ 4355.3406, 4355.9456, 4377.2119, 4400.1786, 4402.7528] +25-08-31 04:17:19 | D | - best error = [ 4267.5518, 4267.5518, 4267.5518, 4267.5518, 4267.5518] +25-08-31 04:17:19 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:17:19 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:17:19 | D | - sum error = [ 4412.8836, 4440.7553, 4463.0402, 4478.7360] +25-08-31 04:17:19 | D | - best error = [ 4267.5518, 4267.5518, 4267.5518, 4267.5518] +25-08-31 04:17:19 | D | + error = 4267.5518 +25-08-31 04:17:19 | D | + scale = [min=0.9925, max=1.1248] +25-08-31 04:17:20 | D | - transformer_blocks.7.ff.up_proj +25-08-31 04:17:20 | D | + w: sfp4_e2m1_all +25-08-31 04:17:20 | D | + x: sfp4_e2m1_all +25-08-31 04:17:20 | D | + y: None +25-08-31 04:17:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:17:20 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:17:20 | D | + x - AbsMax +25-08-31 04:17:20 | D | + x = [min=0.0747, max=14.8750] +25-08-31 04:17:20 | D | + w - AbsMax +25-08-31 04:17:20 | D | + w = [min=0.1094, max=0.4375] +25-08-31 04:17:20 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 04:17:21 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 04:18:39 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:18:39 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:18:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:18:39 | D | - sum error = [ 7961.8505, 7874.3091, 7811.8041, 7747.8487, 7723.7667] +25-08-31 04:18:39 | D | - best error = [ 7961.8505, 7874.3091, 7811.8041, 7747.8487, 7723.7667] +25-08-31 04:18:39 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:18:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:18:39 | D | - sum error = [ 7663.3217, 7629.5966, 7606.5096, 7575.6787, 7574.7818] +25-08-31 04:18:39 | D | - best error = [ 7663.3217, 7629.5966, 7606.5096, 7575.6787, 7574.7818] +25-08-31 04:18:39 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:18:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:18:39 | D | - sum error = [ 7567.1839, 7569.8362, 7583.1571, 7610.0665, 7653.0781] +25-08-31 04:18:39 | D | - best error = [ 7567.1839, 7567.1839, 7567.1839, 7567.1839, 7567.1839] +25-08-31 04:18:39 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:18:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:18:39 | D | - sum error = [ 7677.1193, 7741.7905, 7811.4014, 7876.3608, 7987.2759] +25-08-31 04:18:39 | D | - best error = [ 7567.1839, 7567.1839, 7567.1839, 7567.1839, 7567.1839] +25-08-31 04:18:39 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:18:39 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:18:39 | D | - sum error = [ 8262.2016, 8145.7795, 8019.1521, 7920.3899, 7843.1687] +25-08-31 04:18:39 | D | - best error = [ 7567.1839, 7567.1839, 7567.1839, 7567.1839, 7567.1839] +25-08-31 04:18:39 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:18:39 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:18:39 | D | - sum error = [ 7765.6634, 7723.8206, 7679.5206, 7659.5449, 7625.7070] +25-08-31 04:18:39 | D | - best error = [ 7567.1839, 7567.1839, 7567.1839, 7567.1839, 7567.1839] +25-08-31 04:18:39 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:18:39 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:18:39 | D | - sum error = [ 7614.9138, 7609.4665, 7626.2526, 7651.2697, 7690.8767] +25-08-31 04:18:39 | D | - best error = [ 7567.1839, 7567.1839, 7567.1839, 7567.1839, 7567.1839] +25-08-31 04:18:39 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:18:39 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:18:39 | D | - sum error = [ 7743.1017, 7788.5341, 7864.5142, 7970.9033] +25-08-31 04:18:39 | D | - best error = [ 7567.1839, 7567.1839, 7567.1839, 7567.1839] +25-08-31 04:18:39 | D | + error = 7567.1839 +25-08-31 04:18:39 | D | + scale = [min=0.2733, max=3.8568] +25-08-31 04:18:39 | D | - transformer_blocks.7.ff.down_proj +25-08-31 04:18:39 | D | + w: sfp4_e2m1_all +25-08-31 04:18:39 | D | + x: sfp4_e2m1_all +25-08-31 04:18:39 | D | + y: None +25-08-31 04:18:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:18:39 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:18:42 | D | + x - AbsMax +25-08-31 04:18:42 | D | + x = [min=0.0000, max=56.7500] +25-08-31 04:18:42 | D | + w - AbsMax +25-08-31 04:18:42 | D | + w = [min=0.1045, max=0.9414] +25-08-31 04:18:42 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 04:18:44 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 04:20:35 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:20:35 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:20:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:20:35 | D | - sum error = [ 6288.5601, 6284.2260, 6291.3763, 6264.3886, 6273.3350] +25-08-31 04:20:35 | D | - best error = [ 6288.5601, 6284.2260, 6284.2260, 6264.3886, 6264.3886] +25-08-31 04:20:35 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:20:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:20:35 | D | - sum error = [ 6279.7032, 6287.3247, 6295.8504, 6313.8473, 6332.1769] +25-08-31 04:20:35 | D | - best error = [ 6264.3886, 6264.3886, 6264.3886, 6264.3886, 6264.3886] +25-08-31 04:20:35 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:20:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:20:35 | D | - sum error = [ 6339.4208, 6356.8936, 6382.9247, 6396.1382, 6421.0684] +25-08-31 04:20:35 | D | - best error = [ 6264.3886, 6264.3886, 6264.3886, 6264.3886, 6264.3886] +25-08-31 04:20:35 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:20:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:20:35 | D | - sum error = [ 6457.0362, 6489.7217, 6523.6131, 6558.8828, 6600.8805] +25-08-31 04:20:35 | D | - best error = [ 6264.3886, 6264.3886, 6264.3886, 6264.3886, 6264.3886] +25-08-31 04:20:35 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:20:35 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:20:35 | D | - sum error = [ 6769.6588, 6609.4305, 6571.0553, 6529.1954, 6490.7898] +25-08-31 04:20:35 | D | - best error = [ 6264.3886, 6264.3886, 6264.3886, 6264.3886, 6264.3886] +25-08-31 04:20:35 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:20:35 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:20:35 | D | - sum error = [ 6458.1508, 6429.7717, 6417.9523, 6411.2039, 6381.1717] +25-08-31 04:20:35 | D | - best error = [ 6264.3886, 6264.3886, 6264.3886, 6264.3886, 6264.3886] +25-08-31 04:20:35 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:20:35 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:20:35 | D | - sum error = [ 6390.5167, 6394.0160, 6405.7293, 6410.1426, 6438.8909] +25-08-31 04:20:35 | D | - best error = [ 6264.3886, 6264.3886, 6264.3886, 6264.3886, 6264.3886] +25-08-31 04:20:35 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:20:35 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:20:35 | D | - sum error = [ 6461.0296, 6492.3629, 6542.4583, 6585.3176] +25-08-31 04:20:35 | D | - best error = [ 6264.3886, 6264.3886, 6264.3886, 6264.3886] +25-08-31 04:20:35 | D | + error = 6264.3886 +25-08-31 04:20:35 | D | + scale = [min=0.0950, max=1.8327] +25-08-31 04:20:35 | D | - transformer_blocks.7.ff_context.up_proj +25-08-31 04:20:35 | D | + w: sfp4_e2m1_all +25-08-31 04:20:35 | D | + x: sfp4_e2m1_all +25-08-31 04:20:35 | D | + y: None +25-08-31 04:20:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:20:35 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 04:20:36 | D | + x - AbsMax +25-08-31 04:20:36 | D | + x = [min=0.1582, max=8.7500] +25-08-31 04:20:36 | D | + w - AbsMax +25-08-31 04:20:36 | D | + w = [min=0.0786, max=0.5391] +25-08-31 04:20:36 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 04:20:36 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 04:21:19 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:21:19 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:21:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:21:19 | D | - sum error = [ 3356.1394, 3282.8159, 3233.5453, 3190.8287, 3145.8819] +25-08-31 04:21:19 | D | - best error = [ 3356.1394, 3282.8159, 3233.5453, 3190.8287, 3145.8819] +25-08-31 04:21:19 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:21:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:21:19 | D | - sum error = [ 3131.2116, 3093.8995, 3074.1963, 3063.0517, 3052.8773] +25-08-31 04:21:19 | D | - best error = [ 3131.2116, 3093.8995, 3074.1963, 3063.0517, 3052.8773] +25-08-31 04:21:19 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:21:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:21:19 | D | - sum error = [ 3023.6329, 3031.4856, 3033.1845, 3028.5213, 3028.3950] +25-08-31 04:21:19 | D | - best error = [ 3023.6329, 3023.6329, 3023.6329, 3023.6329, 3023.6329] +25-08-31 04:21:19 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:21:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:21:19 | D | - sum error = [ 3033.7119, 3037.0443, 3055.1484, 3078.7111, 3083.7467] +25-08-31 04:21:19 | D | - best error = [ 3023.6329, 3023.6329, 3023.6329, 3023.6329, 3023.6329] +25-08-31 04:21:19 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:21:19 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:21:19 | D | - sum error = [ 3216.6825, 3156.4810, 3124.7102, 3091.8389, 3056.3017] +25-08-31 04:21:19 | D | - best error = [ 3023.6329, 3023.6329, 3023.6329, 3023.6329, 3023.6329] +25-08-31 04:21:19 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:21:19 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:21:19 | D | - sum error = [ 3050.4752, 3026.8604, 3020.1002, 3010.0008, 2990.6645] +25-08-31 04:21:19 | D | - best error = [ 3023.6329, 3023.6329, 3020.1002, 3010.0008, 2990.6645] +25-08-31 04:21:19 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:21:19 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:21:19 | D | - sum error = [ 2991.4861, 3008.4428, 2989.2079, 2997.7932, 3024.0043] +25-08-31 04:21:19 | D | - best error = [ 2990.6645, 2990.6645, 2989.2079, 2989.2079, 2989.2079] +25-08-31 04:21:19 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:21:19 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:21:19 | D | - sum error = [ 3016.7960, 3052.4947, 3073.9780, 3091.3122] +25-08-31 04:21:19 | D | - best error = [ 2989.2079, 2989.2079, 2989.2079, 2989.2079] +25-08-31 04:21:19 | D | + error = 2989.2079 +25-08-31 04:21:19 | D | + scale = [min=0.5243, max=8.3243] +25-08-31 04:21:19 | D | - transformer_blocks.7.ff_context.down_proj +25-08-31 04:21:19 | D | + w: sfp4_e2m1_all +25-08-31 04:21:19 | D | + x: sfp4_e2m1_all +25-08-31 04:21:19 | D | + y: None +25-08-31 04:21:19 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:21:19 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:21:20 | D | + x - AbsMax +25-08-31 04:21:20 | D | + x = [min=0.0000, max=45.5000] +25-08-31 04:21:20 | D | + w - AbsMax +25-08-31 04:21:20 | D | + w = [min=0.0211, max=0.5859] +25-08-31 04:21:20 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 04:21:20 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 04:22:08 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:22:08 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:22:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:22:08 | D | - sum error = [ 688.8984, 676.2230, 654.3862, 644.6206, 632.9756] +25-08-31 04:22:08 | D | - best error = [ 688.8984, 676.2230, 654.3862, 644.6206, 632.9756] +25-08-31 04:22:08 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:22:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:22:08 | D | - sum error = [ 627.6097, 621.9220, 619.2473, 614.5873, 613.9453] +25-08-31 04:22:08 | D | - best error = [ 627.6097, 621.9220, 619.2473, 614.5873, 613.9453] +25-08-31 04:22:08 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:22:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:22:08 | D | - sum error = [ 617.6561, 641.8077, 622.0051, 626.2257, 633.8586] +25-08-31 04:22:08 | D | - best error = [ 613.9453, 613.9453, 613.9453, 613.9453, 613.9453] +25-08-31 04:22:08 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:22:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:22:08 | D | - sum error = [ 641.7925, 655.4048, 671.8883, 692.2839, 725.4406] +25-08-31 04:22:08 | D | - best error = [ 613.9453, 613.9453, 613.9453, 613.9453, 613.9453] +25-08-31 04:22:08 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:22:08 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:22:08 | D | - sum error = [ 738.9704, 727.9357, 711.4265, 699.2580, 694.4879] +25-08-31 04:22:08 | D | - best error = [ 613.9453, 613.9453, 613.9453, 613.9453, 613.9453] +25-08-31 04:22:08 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:22:08 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:22:08 | D | - sum error = [ 680.8662, 670.8135, 670.1293, 663.3360, 664.1758] +25-08-31 04:22:08 | D | - best error = [ 613.9453, 613.9453, 613.9453, 613.9453, 613.9453] +25-08-31 04:22:08 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:22:08 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:22:08 | D | - sum error = [ 659.9379, 661.5390, 684.3656, 665.0070, 669.6243] +25-08-31 04:22:08 | D | - best error = [ 613.9453, 613.9453, 613.9453, 613.9453, 613.9453] +25-08-31 04:22:08 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:22:08 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:22:08 | D | - sum error = [ 677.7788, 687.6606, 702.9116, 719.8256] +25-08-31 04:22:08 | D | - best error = [ 613.9453, 613.9453, 613.9453, 613.9453] +25-08-31 04:22:08 | D | + error = 613.9453 +25-08-31 04:22:08 | D | + scale = [min=0.0009, max=5.5732] +25-08-31 04:22:27 | D | - Smoothing Diffusion Block transformer_blocks.8 +25-08-31 04:22:27 | D | - Skipping Module transformer_blocks.8.norm1.linear +25-08-31 04:22:27 | D | - Skipping Module transformer_blocks.8.norm1_context.linear +25-08-31 04:22:27 | D | - Smoothing Transformer Block transformer_blocks.8 +25-08-31 04:22:27 | D | - transformer_blocks.8.attn.qkv_proj +25-08-31 04:22:27 | D | + w: sfp4_e2m1_all +25-08-31 04:22:27 | D | + x: sfp4_e2m1_all +25-08-31 04:22:27 | D | + y: None +25-08-31 04:22:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:22:27 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:22:27 | D | + x - AbsMax +25-08-31 04:22:27 | D | + x = [min=0.0452, max=15.0625] +25-08-31 04:22:27 | D | + w - AbsMax +25-08-31 04:22:27 | D | + w = [min=0.1123, max=0.6406] +25-08-31 04:22:27 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 04:22:28 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 04:23:41 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:23:41 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:23:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:23:41 | D | - sum error = [ 3515.8322, 3464.2550, 3393.3122, 3364.1131, 3295.5379] +25-08-31 04:23:41 | D | - best error = [ 3515.8322, 3464.2550, 3393.3122, 3364.1131, 3295.5379] +25-08-31 04:23:41 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:23:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:23:41 | D | - sum error = [ 3281.2894, 3241.5246, 3234.6100, 3204.9527, 3191.2916] +25-08-31 04:23:41 | D | - best error = [ 3281.2894, 3241.5246, 3234.6100, 3204.9527, 3191.2916] +25-08-31 04:23:41 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:23:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:23:41 | D | - sum error = [ 3152.5559, 3180.5553, 3233.9549, 3194.3556, 3168.5612] +25-08-31 04:23:41 | D | - best error = [ 3152.5559, 3152.5559, 3152.5559, 3152.5559, 3152.5559] +25-08-31 04:23:41 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:23:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:23:41 | D | - sum error = [ 3245.1245, 3237.3149, 3223.6510, 3317.6623, 3331.6816] +25-08-31 04:23:41 | D | - best error = [ 3152.5559, 3152.5559, 3152.5559, 3152.5559, 3152.5559] +25-08-31 04:23:41 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:23:41 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:23:41 | D | - sum error = [ 6722.6489, 6354.7318, 5714.0070, 5530.1468, 5344.9846] +25-08-31 04:23:41 | D | - best error = [ 3152.5559, 3152.5559, 3152.5559, 3152.5559, 3152.5559] +25-08-31 04:23:41 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:23:41 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:23:41 | D | - sum error = [ 4766.6980, 4683.0973, 4501.8110, 4243.1512, 4027.6280] +25-08-31 04:23:41 | D | - best error = [ 3152.5559, 3152.5559, 3152.5559, 3152.5559, 3152.5559] +25-08-31 04:23:41 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:23:41 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:23:41 | D | - sum error = [ 3970.4524, 3698.1683, 3693.3942, 3534.5379, 3425.2343] +25-08-31 04:23:41 | D | - best error = [ 3152.5559, 3152.5559, 3152.5559, 3152.5559, 3152.5559] +25-08-31 04:23:41 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:23:41 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:23:41 | D | - sum error = [ 3367.0544, 3368.1390, 3408.7658, 3431.4662] +25-08-31 04:23:41 | D | - best error = [ 3152.5559, 3152.5559, 3152.5559, 3152.5559] +25-08-31 04:23:41 | D | + error = 3152.5559 +25-08-31 04:23:41 | D | + scale = [min=0.2125, max=3.8810] +25-08-31 04:23:41 | D | - transformer_blocks.8.attn add_qkv_proj +25-08-31 04:23:41 | D | + w: sfp4_e2m1_all +25-08-31 04:23:41 | D | + x: sfp4_e2m1_all +25-08-31 04:23:41 | D | + y: None +25-08-31 04:23:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:23:41 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:23:42 | D | + x - AbsMax +25-08-31 04:23:42 | D | + x = [min=0.1060, max=19.1250] +25-08-31 04:23:42 | D | + w - AbsMax +25-08-31 04:23:42 | D | + w = [min=0.1030, max=0.4082] +25-08-31 04:23:42 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 04:23:42 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 04:24:46 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:24:46 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:24:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:24:46 | D | - sum error = [ 977.7781, 989.7120, 950.7629, 949.6945, 914.4100] +25-08-31 04:24:46 | D | - best error = [ 977.7781, 977.7781, 950.7629, 949.6945, 914.4100] +25-08-31 04:24:46 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:24:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:24:46 | D | - sum error = [ 882.0693, 864.6818, 870.2766, 844.5304, 846.0264] +25-08-31 04:24:46 | D | - best error = [ 882.0693, 864.6818, 864.6818, 844.5304, 844.5304] +25-08-31 04:24:46 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:24:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:24:46 | D | - sum error = [ 859.8077, 867.9718, 869.6235, 878.9140, 863.9642] +25-08-31 04:24:46 | D | - best error = [ 844.5304, 844.5304, 844.5304, 844.5304, 844.5304] +25-08-31 04:24:46 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:24:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:24:46 | D | - sum error = [ 849.4022, 868.3498, 898.3920, 915.2588, 948.0002] +25-08-31 04:24:46 | D | - best error = [ 844.5304, 844.5304, 844.5304, 844.5304, 844.5304] +25-08-31 04:24:46 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:24:46 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:24:46 | D | - sum error = [ 1182.8627, 1108.3494, 1059.6509, 1010.2541, 999.1724] +25-08-31 04:24:46 | D | - best error = [ 844.5304, 844.5304, 844.5304, 844.5304, 844.5304] +25-08-31 04:24:46 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:24:46 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:24:46 | D | - sum error = [ 957.5020, 929.5917, 899.4267, 896.8309, 903.8031] +25-08-31 04:24:46 | D | - best error = [ 844.5304, 844.5304, 844.5304, 844.5304, 844.5304] +25-08-31 04:24:46 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:24:46 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:24:46 | D | - sum error = [ 879.3852, 850.0179, 863.8143, 861.8453, 877.8919] +25-08-31 04:24:46 | D | - best error = [ 844.5304, 844.5304, 844.5304, 844.5304, 844.5304] +25-08-31 04:24:46 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:24:46 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:24:46 | D | - sum error = [ 875.5955, 900.4020, 913.7610, 928.8838] +25-08-31 04:24:46 | D | - best error = [ 844.5304, 844.5304, 844.5304, 844.5304] +25-08-31 04:24:46 | D | + error = 844.5304 +25-08-31 04:24:46 | D | + scale = [min=0.4074, max=3.2557] +25-08-31 04:24:46 | D | - transformer_blocks.8.attn.out_proj + transformer_blocks.8.attn.add_out_proj +25-08-31 04:24:46 | D | + w: sfp4_e2m1_all +25-08-31 04:24:46 | D | + x: sfp4_e2m1_all +25-08-31 04:24:46 | D | + y: None +25-08-31 04:24:46 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:24:46 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:24:47 | D | + x - AbsMax +25-08-31 04:24:47 | D | + x = [min=1.2500, max=12.3125] +25-08-31 04:24:47 | D | + w - AbsMax +25-08-31 04:24:47 | D | + w = [min=0.1279, max=0.3164] +25-08-31 04:24:47 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 04:24:48 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 04:26:34 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:26:34 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:26:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:26:34 | D | - sum error = [ 5120.1949, 5132.0779, 5143.0158, 5131.6325, 5142.8523] +25-08-31 04:26:34 | D | - best error = [ 5120.1949, 5120.1949, 5120.1949, 5120.1949, 5120.1949] +25-08-31 04:26:34 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:26:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:26:34 | D | - sum error = [ 5146.6267, 5140.3255, 5147.8636, 5141.8456, 5143.6493] +25-08-31 04:26:34 | D | - best error = [ 5120.1949, 5120.1949, 5120.1949, 5120.1949, 5120.1949] +25-08-31 04:26:34 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:26:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:26:34 | D | - sum error = [ 5167.5983, 5159.9860, 5167.9642, 5171.6467, 5195.7471] +25-08-31 04:26:34 | D | - best error = [ 5120.1949, 5120.1949, 5120.1949, 5120.1949, 5120.1949] +25-08-31 04:26:34 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:26:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:26:34 | D | - sum error = [ 5198.0658, 5212.9278, 5236.7289, 5258.1993, 5270.1078] +25-08-31 04:26:34 | D | - best error = [ 5120.1949, 5120.1949, 5120.1949, 5120.1949, 5120.1949] +25-08-31 04:26:34 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:26:34 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:26:34 | D | - sum error = [ 5172.1848, 5163.7885, 5157.7468, 5150.3934, 5140.0813] +25-08-31 04:26:34 | D | - best error = [ 5120.1949, 5120.1949, 5120.1949, 5120.1949, 5120.1949] +25-08-31 04:26:34 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:26:34 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:26:34 | D | - sum error = [ 5158.7124, 5153.9866, 5143.9419, 5161.1956, 5155.4503] +25-08-31 04:26:34 | D | - best error = [ 5120.1949, 5120.1949, 5120.1949, 5120.1949, 5120.1949] +25-08-31 04:26:34 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:26:34 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:26:34 | D | - sum error = [ 5155.3892, 5169.1941, 5164.6256, 5186.7323, 5207.0314] +25-08-31 04:26:34 | D | - best error = [ 5120.1949, 5120.1949, 5120.1949, 5120.1949, 5120.1949] +25-08-31 04:26:34 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:26:34 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:26:34 | D | - sum error = [ 5213.0500, 5229.3675, 5252.4509, 5253.9928] +25-08-31 04:26:34 | D | - best error = [ 5120.1949, 5120.1949, 5120.1949, 5120.1949] +25-08-31 04:26:34 | D | + error = 5120.1949 +25-08-31 04:26:34 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 04:26:34 | D | - transformer_blocks.8.ff.up_proj +25-08-31 04:26:34 | D | + w: sfp4_e2m1_all +25-08-31 04:26:34 | D | + x: sfp4_e2m1_all +25-08-31 04:26:34 | D | + y: None +25-08-31 04:26:34 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:26:34 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 04:26:35 | D | + x - AbsMax +25-08-31 04:26:35 | D | + x = [min=0.0752, max=14.6875] +25-08-31 04:26:35 | D | + w - AbsMax +25-08-31 04:26:35 | D | + w = [min=0.1113, max=0.5117] +25-08-31 04:26:35 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 04:26:36 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 04:27:52 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:27:52 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:27:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:27:52 | D | - sum error = [ 7518.9010, 7466.7249, 7433.5838, 7401.2621, 7363.6089] +25-08-31 04:27:52 | D | - best error = [ 7518.9010, 7466.7249, 7433.5838, 7401.2621, 7363.6089] +25-08-31 04:27:52 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:27:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:27:52 | D | - sum error = [ 7352.0269, 7317.7863, 7300.3173, 7289.0307, 7284.8539] +25-08-31 04:27:52 | D | - best error = [ 7352.0269, 7317.7863, 7300.3173, 7289.0307, 7284.8539] +25-08-31 04:27:52 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:27:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:27:52 | D | - sum error = [ 7283.9209, 7289.0386, 7285.6838, 7290.2368, 7299.0103] +25-08-31 04:27:52 | D | - best error = [ 7283.9209, 7283.9209, 7283.9209, 7283.9209, 7283.9209] +25-08-31 04:27:52 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:27:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:27:52 | D | - sum error = [ 7337.4729, 7353.7414, 7374.4177, 7421.0161, 7480.9286] +25-08-31 04:27:52 | D | - best error = [ 7283.9209, 7283.9209, 7283.9209, 7283.9209, 7283.9209] +25-08-31 04:27:52 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:27:52 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:27:52 | D | - sum error = [ 7941.6719, 7834.3880, 7722.3657, 7636.9478, 7562.4943] +25-08-31 04:27:52 | D | - best error = [ 7283.9209, 7283.9209, 7283.9209, 7283.9209, 7283.9209] +25-08-31 04:27:52 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:27:52 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:27:52 | D | - sum error = [ 7493.2017, 7447.6471, 7399.9924, 7382.9160, 7367.9590] +25-08-31 04:27:52 | D | - best error = [ 7283.9209, 7283.9209, 7283.9209, 7283.9209, 7283.9209] +25-08-31 04:27:52 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:27:52 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:27:52 | D | - sum error = [ 7338.9811, 7327.7301, 7347.2678, 7338.4140, 7340.7975] +25-08-31 04:27:52 | D | - best error = [ 7283.9209, 7283.9209, 7283.9209, 7283.9209, 7283.9209] +25-08-31 04:27:52 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:27:52 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:27:52 | D | - sum error = [ 7358.0641, 7386.0287, 7418.6638, 7466.0895] +25-08-31 04:27:52 | D | - best error = [ 7283.9209, 7283.9209, 7283.9209, 7283.9209] +25-08-31 04:27:52 | D | + error = 7283.9209 +25-08-31 04:27:52 | D | + scale = [min=0.2742, max=3.8324] +25-08-31 04:27:52 | D | - transformer_blocks.8.ff.down_proj +25-08-31 04:27:52 | D | + w: sfp4_e2m1_all +25-08-31 04:27:52 | D | + x: sfp4_e2m1_all +25-08-31 04:27:52 | D | + y: None +25-08-31 04:27:52 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:27:52 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:27:54 | D | + x - AbsMax +25-08-31 04:27:54 | D | + x = [min=0.0000, max=21.8750] +25-08-31 04:27:54 | D | + w - AbsMax +25-08-31 04:27:54 | D | + w = [min=0.0466, max=0.5547] +25-08-31 04:27:54 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 04:27:55 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 04:29:38 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:29:38 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:29:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:29:38 | D | - sum error = [ 6642.3863, 6630.5448, 6611.8633, 6616.3446, 6613.0825] +25-08-31 04:29:38 | D | - best error = [ 6642.3863, 6630.5448, 6611.8633, 6611.8633, 6611.8633] +25-08-31 04:29:38 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:29:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:29:38 | D | - sum error = [ 6607.9343, 6624.7666, 6628.0048, 6638.0398, 6652.2159] +25-08-31 04:29:38 | D | - best error = [ 6607.9343, 6607.9343, 6607.9343, 6607.9343, 6607.9343] +25-08-31 04:29:38 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:29:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:29:38 | D | - sum error = [ 6666.3951, 6678.4732, 6702.6983, 6719.0377, 6745.4370] +25-08-31 04:29:38 | D | - best error = [ 6607.9343, 6607.9343, 6607.9343, 6607.9343, 6607.9343] +25-08-31 04:29:38 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:29:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:29:38 | D | - sum error = [ 6778.3697, 6817.8073, 6851.2954, 6898.1922, 6954.5266] +25-08-31 04:29:38 | D | - best error = [ 6607.9343, 6607.9343, 6607.9343, 6607.9343, 6607.9343] +25-08-31 04:29:38 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:29:38 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:29:38 | D | - sum error = [ 6946.6470, 6909.5725, 6862.6590, 6823.3822, 6791.1634] +25-08-31 04:29:38 | D | - best error = [ 6607.9343, 6607.9343, 6607.9343, 6607.9343, 6607.9343] +25-08-31 04:29:38 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:29:38 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:29:38 | D | - sum error = [ 6757.6665, 6742.1982, 6727.8781, 6730.2383, 6704.1304] +25-08-31 04:29:38 | D | - best error = [ 6607.9343, 6607.9343, 6607.9343, 6607.9343, 6607.9343] +25-08-31 04:29:38 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:29:38 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:29:38 | D | - sum error = [ 6699.7651, 6707.3900, 6718.8460, 6728.3844, 6767.6330] +25-08-31 04:29:38 | D | - best error = [ 6607.9343, 6607.9343, 6607.9343, 6607.9343, 6607.9343] +25-08-31 04:29:38 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:29:38 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:29:38 | D | - sum error = [ 6790.3676, 6833.1188, 6874.5211, 6940.9118] +25-08-31 04:29:38 | D | - best error = [ 6607.9343, 6607.9343, 6607.9343, 6607.9343] +25-08-31 04:29:38 | D | + error = 6607.9343 +25-08-31 04:29:38 | D | + scale = [min=0.0197, max=2.1627] +25-08-31 04:29:39 | D | - transformer_blocks.8.ff_context.up_proj +25-08-31 04:29:39 | D | + w: sfp4_e2m1_all +25-08-31 04:29:39 | D | + x: sfp4_e2m1_all +25-08-31 04:29:39 | D | + y: None +25-08-31 04:29:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:29:39 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:29:39 | D | + x - AbsMax +25-08-31 04:29:39 | D | + x = [min=0.1758, max=15.0000] +25-08-31 04:29:39 | D | + w - AbsMax +25-08-31 04:29:39 | D | + w = [min=0.0820, max=0.4922] +25-08-31 04:29:39 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 04:29:39 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 04:30:22 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:30:22 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:30:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:30:22 | D | - sum error = [ 3815.1577, 3718.3151, 3651.7645, 3607.8940, 3577.3371] +25-08-31 04:30:22 | D | - best error = [ 3815.1577, 3718.3151, 3651.7645, 3607.8940, 3577.3371] +25-08-31 04:30:22 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:30:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:30:22 | D | - sum error = [ 3521.3729, 3492.5548, 3473.8742, 3453.7692, 3441.7873] +25-08-31 04:30:22 | D | - best error = [ 3521.3729, 3492.5548, 3473.8742, 3453.7692, 3441.7873] +25-08-31 04:30:22 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:30:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:30:22 | D | - sum error = [ 3438.2216, 3404.2566, 3422.7002, 3425.1885, 3420.9341] +25-08-31 04:30:22 | D | - best error = [ 3438.2216, 3404.2566, 3404.2566, 3404.2566, 3404.2566] +25-08-31 04:30:22 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:30:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:30:22 | D | - sum error = [ 3435.9159, 3437.8930, 3494.4759, 3484.4354, 3501.2366] +25-08-31 04:30:22 | D | - best error = [ 3404.2566, 3404.2566, 3404.2566, 3404.2566, 3404.2566] +25-08-31 04:30:22 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:30:22 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:30:22 | D | - sum error = [ 3618.2178, 3569.4892, 3529.5266, 3496.9940, 3467.3373] +25-08-31 04:30:22 | D | - best error = [ 3404.2566, 3404.2566, 3404.2566, 3404.2566, 3404.2566] +25-08-31 04:30:22 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:30:22 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:30:22 | D | - sum error = [ 3437.7530, 3422.4269, 3400.8356, 3392.8516, 3376.5434] +25-08-31 04:30:22 | D | - best error = [ 3404.2566, 3404.2566, 3400.8356, 3392.8516, 3376.5434] +25-08-31 04:30:22 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:30:22 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:30:22 | D | - sum error = [ 3374.3502, 3363.1567, 3383.9598, 3404.7976, 3406.4273] +25-08-31 04:30:22 | D | - best error = [ 3374.3502, 3363.1567, 3363.1567, 3363.1567, 3363.1567] +25-08-31 04:30:22 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:30:22 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:30:22 | D | - sum error = [ 3413.9174, 3457.2457, 3462.6594, 3502.6240] +25-08-31 04:30:22 | D | - best error = [ 3363.1567, 3363.1567, 3363.1567, 3363.1567] +25-08-31 04:30:22 | D | + error = 3363.1567 +25-08-31 04:30:22 | D | + scale = [min=0.7122, max=13.2267] +25-08-31 04:30:22 | D | - transformer_blocks.8.ff_context.down_proj +25-08-31 04:30:22 | D | + w: sfp4_e2m1_all +25-08-31 04:30:22 | D | + x: sfp4_e2m1_all +25-08-31 04:30:22 | D | + y: None +25-08-31 04:30:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:30:22 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 04:30:23 | D | + x - AbsMax +25-08-31 04:30:23 | D | + x = [min=0.0000, max=51.7500] +25-08-31 04:30:23 | D | + w - AbsMax +25-08-31 04:30:23 | D | + w = [min=0.0195, max=0.4414] +25-08-31 04:30:23 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 04:30:23 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 04:31:11 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:31:11 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:31:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:31:11 | D | - sum error = [ 628.3901, 607.6871, 589.5398, 577.1147, 570.7669] +25-08-31 04:31:11 | D | - best error = [ 628.3901, 607.6871, 589.5398, 577.1147, 570.7669] +25-08-31 04:31:11 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:31:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:31:11 | D | - sum error = [ 559.6546, 555.1085, 553.7827, 549.5734, 550.0396] +25-08-31 04:31:11 | D | - best error = [ 559.6546, 555.1085, 553.7827, 549.5734, 549.5734] +25-08-31 04:31:11 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:31:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:31:11 | D | - sum error = [ 549.4565, 552.7842, 556.9916, 562.4331, 571.9410] +25-08-31 04:31:11 | D | - best error = [ 549.4565, 549.4565, 549.4565, 549.4565, 549.4565] +25-08-31 04:31:11 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:31:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:31:11 | D | - sum error = [ 580.6693, 597.6300, 616.0429, 635.5231, 663.7903] +25-08-31 04:31:11 | D | - best error = [ 549.4565, 549.4565, 549.4565, 549.4565, 549.4565] +25-08-31 04:31:11 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:31:11 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:31:11 | D | - sum error = [ 690.6506, 675.1960, 655.9044, 644.8340, 636.4824] +25-08-31 04:31:11 | D | - best error = [ 549.4565, 549.4565, 549.4565, 549.4565, 549.4565] +25-08-31 04:31:11 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:31:11 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:31:11 | D | - sum error = [ 625.6644, 628.1313, 614.2861, 611.8432, 609.8633] +25-08-31 04:31:11 | D | - best error = [ 549.4565, 549.4565, 549.4565, 549.4565, 549.4565] +25-08-31 04:31:11 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:31:11 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:31:11 | D | - sum error = [ 608.5564, 608.8003, 610.8659, 615.4475, 621.2597] +25-08-31 04:31:11 | D | - best error = [ 549.4565, 549.4565, 549.4565, 549.4565, 549.4565] +25-08-31 04:31:11 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:31:11 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:31:11 | D | - sum error = [ 630.9370, 640.9364, 655.0810, 673.7230] +25-08-31 04:31:11 | D | - best error = [ 549.4565, 549.4565, 549.4565, 549.4565] +25-08-31 04:31:11 | D | + error = 549.4565 +25-08-31 04:31:11 | D | + scale = [min=0.0004, max=7.1937] +25-08-31 04:31:29 | D | - Smoothing Diffusion Block transformer_blocks.9 +25-08-31 04:31:29 | D | - Skipping Module transformer_blocks.9.norm1.linear +25-08-31 04:31:29 | D | - Skipping Module transformer_blocks.9.norm1_context.linear +25-08-31 04:31:29 | D | - Smoothing Transformer Block transformer_blocks.9 +25-08-31 04:31:29 | D | - transformer_blocks.9.attn.qkv_proj +25-08-31 04:31:29 | D | + w: sfp4_e2m1_all +25-08-31 04:31:29 | D | + x: sfp4_e2m1_all +25-08-31 04:31:29 | D | + y: None +25-08-31 04:31:29 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:31:29 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:31:30 | D | + x - AbsMax +25-08-31 04:31:30 | D | + x = [min=0.0894, max=16.3750] +25-08-31 04:31:30 | D | + w - AbsMax +25-08-31 04:31:30 | D | + w = [min=0.1099, max=0.4629] +25-08-31 04:31:30 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:31:31 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:32:44 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:32:44 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:32:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:32:44 | D | - sum error = [ 4146.1848, 4066.6720, 3994.6104, 3986.4217, 3943.1870] +25-08-31 04:32:44 | D | - best error = [ 4146.1848, 4066.6720, 3994.6104, 3986.4217, 3943.1870] +25-08-31 04:32:44 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:32:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:32:44 | D | - sum error = [ 3892.4334, 3864.1308, 3815.7104, 3785.2650, 3781.0417] +25-08-31 04:32:44 | D | - best error = [ 3892.4334, 3864.1308, 3815.7104, 3785.2650, 3781.0417] +25-08-31 04:32:44 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:32:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:32:44 | D | - sum error = [ 3762.5854, 3780.9037, 3761.1025, 3722.5763, 3774.7715] +25-08-31 04:32:44 | D | - best error = [ 3762.5854, 3762.5854, 3761.1025, 3722.5763, 3722.5763] +25-08-31 04:32:44 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:32:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:32:44 | D | - sum error = [ 3844.2841, 3829.1011, 3855.5046, 3918.1164, 3950.2362] +25-08-31 04:32:44 | D | - best error = [ 3722.5763, 3722.5763, 3722.5763, 3722.5763, 3722.5763] +25-08-31 04:32:44 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:32:44 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:32:44 | D | - sum error = [ 4769.8566, 4599.1069, 4489.0816, 4417.7476, 4341.1211] +25-08-31 04:32:44 | D | - best error = [ 3722.5763, 3722.5763, 3722.5763, 3722.5763, 3722.5763] +25-08-31 04:32:44 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:32:44 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:32:44 | D | - sum error = [ 4198.1770, 4098.5666, 4026.2022, 3961.5383, 3902.9867] +25-08-31 04:32:44 | D | - best error = [ 3722.5763, 3722.5763, 3722.5763, 3722.5763, 3722.5763] +25-08-31 04:32:44 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:32:44 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:32:44 | D | - sum error = [ 3886.0169, 3824.9335, 3848.4088, 3832.8431, 3810.9630] +25-08-31 04:32:44 | D | - best error = [ 3722.5763, 3722.5763, 3722.5763, 3722.5763, 3722.5763] +25-08-31 04:32:44 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:32:44 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:32:44 | D | - sum error = [ 3877.7245, 3840.8904, 3905.7145, 3952.3826] +25-08-31 04:32:44 | D | - best error = [ 3722.5763, 3722.5763, 3722.5763, 3722.5763] +25-08-31 04:32:44 | D | + error = 3722.5763 +25-08-31 04:32:44 | D | + scale = [min=0.2081, max=6.1549] +25-08-31 04:32:44 | D | - transformer_blocks.9.attn add_qkv_proj +25-08-31 04:32:44 | D | + w: sfp4_e2m1_all +25-08-31 04:32:44 | D | + x: sfp4_e2m1_all +25-08-31 04:32:44 | D | + y: None +25-08-31 04:32:44 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:32:44 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:32:45 | D | + x - AbsMax +25-08-31 04:32:45 | D | + x = [min=0.1533, max=20.2500] +25-08-31 04:32:45 | D | + w - AbsMax +25-08-31 04:32:45 | D | + w = [min=0.1177, max=0.4414] +25-08-31 04:32:45 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:32:45 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:33:49 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:33:49 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:33:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:33:49 | D | - sum error = [ 970.3025, 955.6857, 939.2625, 899.7108, 880.4776] +25-08-31 04:33:49 | D | - best error = [ 970.3025, 955.6857, 939.2625, 899.7108, 880.4776] +25-08-31 04:33:49 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:33:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:33:49 | D | - sum error = [ 877.2424, 878.9051, 856.0143, 838.5569, 823.4276] +25-08-31 04:33:49 | D | - best error = [ 877.2424, 877.2424, 856.0143, 838.5569, 823.4276] +25-08-31 04:33:49 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:33:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:33:49 | D | - sum error = [ 811.3990, 846.8273, 835.3586, 846.5926, 847.5681] +25-08-31 04:33:49 | D | - best error = [ 811.3990, 811.3990, 811.3990, 811.3990, 811.3990] +25-08-31 04:33:49 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:33:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:33:49 | D | - sum error = [ 848.2380, 856.5180, 848.0968, 850.5097, 891.8372] +25-08-31 04:33:49 | D | - best error = [ 811.3990, 811.3990, 811.3990, 811.3990, 811.3990] +25-08-31 04:33:49 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:33:49 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:33:49 | D | - sum error = [ 1113.8348, 1069.1608, 1015.7753, 988.9180, 963.9603] +25-08-31 04:33:49 | D | - best error = [ 811.3990, 811.3990, 811.3990, 811.3990, 811.3990] +25-08-31 04:33:49 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:33:49 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:33:49 | D | - sum error = [ 954.9945, 914.4943, 892.7531, 873.1338, 848.8725] +25-08-31 04:33:49 | D | - best error = [ 811.3990, 811.3990, 811.3990, 811.3990, 811.3990] +25-08-31 04:33:49 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:33:49 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:33:49 | D | - sum error = [ 860.7809, 838.5192, 825.1588, 841.7837, 845.8667] +25-08-31 04:33:49 | D | - best error = [ 811.3990, 811.3990, 811.3990, 811.3990, 811.3990] +25-08-31 04:33:49 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:33:49 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:33:49 | D | - sum error = [ 847.2754, 878.5667, 893.0521, 890.7392] +25-08-31 04:33:49 | D | - best error = [ 811.3990, 811.3990, 811.3990, 811.3990] +25-08-31 04:33:49 | D | + error = 811.3990 +25-08-31 04:33:49 | D | + scale = [min=0.3916, max=4.5000] +25-08-31 04:33:49 | D | - transformer_blocks.9.attn.out_proj + transformer_blocks.9.attn.add_out_proj +25-08-31 04:33:49 | D | + w: sfp4_e2m1_all +25-08-31 04:33:49 | D | + x: sfp4_e2m1_all +25-08-31 04:33:49 | D | + y: None +25-08-31 04:33:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:33:49 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:33:50 | D | + x - AbsMax +25-08-31 04:33:50 | D | + x = [min=2.4531, max=15.8750] +25-08-31 04:33:50 | D | + w - AbsMax +25-08-31 04:33:50 | D | + w = [min=0.1260, max=0.3164] +25-08-31 04:33:50 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:33:51 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:35:37 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:35:37 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:35:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:35:37 | D | - sum error = [ 7181.3890, 7174.0611, 7156.7898, 7162.0262, 7150.1334] +25-08-31 04:35:37 | D | - best error = [ 7181.3890, 7174.0611, 7156.7898, 7156.7898, 7150.1334] +25-08-31 04:35:37 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:35:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:35:37 | D | - sum error = [ 7156.0528, 7162.5244, 7174.7619, 7163.4405, 7156.3661] +25-08-31 04:35:37 | D | - best error = [ 7150.1334, 7150.1334, 7150.1334, 7150.1334, 7150.1334] +25-08-31 04:35:37 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:35:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:35:37 | D | - sum error = [ 7164.6530, 7170.9587, 7177.9189, 7194.2697, 7203.0879] +25-08-31 04:35:37 | D | - best error = [ 7150.1334, 7150.1334, 7150.1334, 7150.1334, 7150.1334] +25-08-31 04:35:37 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:35:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:35:37 | D | - sum error = [ 7228.6181, 7237.7983, 7250.5324, 7275.9698, 7288.0011] +25-08-31 04:35:37 | D | - best error = [ 7150.1334, 7150.1334, 7150.1334, 7150.1334, 7150.1334] +25-08-31 04:35:37 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:35:37 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:35:37 | D | - sum error = [ 7268.6532, 7238.7540, 7232.5818, 7234.7915, 7205.0499] +25-08-31 04:35:37 | D | - best error = [ 7150.1334, 7150.1334, 7150.1334, 7150.1334, 7150.1334] +25-08-31 04:35:37 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:35:37 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:35:37 | D | - sum error = [ 7205.8576, 7200.4772, 7206.4410, 7204.4883, 7212.2390] +25-08-31 04:35:37 | D | - best error = [ 7150.1334, 7150.1334, 7150.1334, 7150.1334, 7150.1334] +25-08-31 04:35:37 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:35:37 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:35:37 | D | - sum error = [ 7202.6739, 7204.2293, 7221.5671, 7210.4890, 7232.2763] +25-08-31 04:35:37 | D | - best error = [ 7150.1334, 7150.1334, 7150.1334, 7150.1334, 7150.1334] +25-08-31 04:35:37 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:35:37 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:35:37 | D | - sum error = [ 7221.4962, 7256.4162, 7275.9956, 7283.1749] +25-08-31 04:35:37 | D | - best error = [ 7150.1334, 7150.1334, 7150.1334, 7150.1334] +25-08-31 04:35:37 | D | + error = 7150.1334 +25-08-31 04:35:37 | D | + scale = [min=1.1966, max=1.7384] +25-08-31 04:35:37 | D | - transformer_blocks.9.ff.up_proj +25-08-31 04:35:37 | D | + w: sfp4_e2m1_all +25-08-31 04:35:37 | D | + x: sfp4_e2m1_all +25-08-31 04:35:37 | D | + y: None +25-08-31 04:35:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:35:37 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:35:38 | D | + x - AbsMax +25-08-31 04:35:38 | D | + x = [min=0.0728, max=11.7500] +25-08-31 04:35:38 | D | + w - AbsMax +25-08-31 04:35:38 | D | + w = [min=0.1191, max=0.5859] +25-08-31 04:35:38 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:35:39 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:36:56 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:36:56 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:36:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:36:56 | D | - sum error = [ 8128.1570, 8057.4698, 7997.1142, 7954.1360, 7914.4374] +25-08-31 04:36:56 | D | - best error = [ 8128.1570, 8057.4698, 7997.1142, 7954.1360, 7914.4374] +25-08-31 04:36:56 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:36:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:36:56 | D | - sum error = [ 7885.4479, 7845.7563, 7837.4613, 7823.7293, 7803.2245] +25-08-31 04:36:56 | D | - best error = [ 7885.4479, 7845.7563, 7837.4613, 7823.7293, 7803.2245] +25-08-31 04:36:56 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:36:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:36:56 | D | - sum error = [ 7811.9693, 7816.2694, 7810.8335, 7843.3455, 7858.9910] +25-08-31 04:36:56 | D | - best error = [ 7803.2245, 7803.2245, 7803.2245, 7803.2245, 7803.2245] +25-08-31 04:36:56 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:36:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:36:56 | D | - sum error = [ 7867.0510, 7908.8148, 7930.5740, 8005.6892, 8049.6286] +25-08-31 04:36:56 | D | - best error = [ 7803.2245, 7803.2245, 7803.2245, 7803.2245, 7803.2245] +25-08-31 04:36:56 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:36:56 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:36:56 | D | - sum error = [ 8615.5213, 8472.5925, 8336.8810, 8231.8654, 8129.2789] +25-08-31 04:36:56 | D | - best error = [ 7803.2245, 7803.2245, 7803.2245, 7803.2245, 7803.2245] +25-08-31 04:36:56 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:36:56 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:36:56 | D | - sum error = [ 8060.8538, 8005.8006, 7957.0052, 7910.2895, 7892.5337] +25-08-31 04:36:56 | D | - best error = [ 7803.2245, 7803.2245, 7803.2245, 7803.2245, 7803.2245] +25-08-31 04:36:56 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:36:56 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:36:56 | D | - sum error = [ 7855.7592, 7855.9420, 7854.9500, 7836.9361, 7874.5890] +25-08-31 04:36:56 | D | - best error = [ 7803.2245, 7803.2245, 7803.2245, 7803.2245, 7803.2245] +25-08-31 04:36:56 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:36:56 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:36:56 | D | - sum error = [ 7911.8898, 7937.2222, 7990.1755, 8049.1865] +25-08-31 04:36:56 | D | - best error = [ 7803.2245, 7803.2245, 7803.2245, 7803.2245] +25-08-31 04:36:56 | D | + error = 7803.2245 +25-08-31 04:36:56 | D | + scale = [min=0.3075, max=3.0305] +25-08-31 04:36:56 | D | - transformer_blocks.9.ff.down_proj +25-08-31 04:36:56 | D | + w: sfp4_e2m1_all +25-08-31 04:36:56 | D | + x: sfp4_e2m1_all +25-08-31 04:36:56 | D | + y: None +25-08-31 04:36:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:36:56 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:36:59 | D | + x - AbsMax +25-08-31 04:36:59 | D | + x = [min=0.0000, max=14.9375] +25-08-31 04:36:59 | D | + w - AbsMax +25-08-31 04:36:59 | D | + w = [min=0.0669, max=0.6406] +25-08-31 04:36:59 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:37:01 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:38:52 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:38:52 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:38:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:38:52 | D | - sum error = [ 6932.9982, 6926.0090, 6919.2170, 6903.8407, 6890.9474] +25-08-31 04:38:52 | D | - best error = [ 6932.9982, 6926.0090, 6919.2170, 6903.8407, 6890.9474] +25-08-31 04:38:52 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:38:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:38:52 | D | - sum error = [ 6892.6874, 6900.4365, 6919.6410, 6914.7651, 6918.9346] +25-08-31 04:38:52 | D | - best error = [ 6890.9474, 6890.9474, 6890.9474, 6890.9474, 6890.9474] +25-08-31 04:38:52 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:38:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:38:52 | D | - sum error = [ 6938.0271, 6931.8194, 6966.5093, 6993.8710, 7013.1565] +25-08-31 04:38:52 | D | - best error = [ 6890.9474, 6890.9474, 6890.9474, 6890.9474, 6890.9474] +25-08-31 04:38:52 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:38:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:38:52 | D | - sum error = [ 7040.0304, 7104.3074, 7115.7206, 7161.3289, 7205.7022] +25-08-31 04:38:52 | D | - best error = [ 6890.9474, 6890.9474, 6890.9474, 6890.9474, 6890.9474] +25-08-31 04:38:52 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:38:52 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:38:52 | D | - sum error = [ 7296.4696, 7220.3100, 7171.0501, 7130.0827, 7092.3065] +25-08-31 04:38:52 | D | - best error = [ 6890.9474, 6890.9474, 6890.9474, 6890.9474, 6890.9474] +25-08-31 04:38:52 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:38:52 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:38:52 | D | - sum error = [ 7041.8594, 7021.1452, 6987.6817, 6982.2610, 6960.5704] +25-08-31 04:38:52 | D | - best error = [ 6890.9474, 6890.9474, 6890.9474, 6890.9474, 6890.9474] +25-08-31 04:38:52 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:38:52 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:38:52 | D | - sum error = [ 6950.0918, 6952.2883, 6954.3210, 6979.8748, 7008.8595] +25-08-31 04:38:52 | D | - best error = [ 6890.9474, 6890.9474, 6890.9474, 6890.9474, 6890.9474] +25-08-31 04:38:52 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:38:52 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:38:52 | D | - sum error = [ 7041.6120, 7079.6271, 7123.3259, 7175.8135] +25-08-31 04:38:52 | D | - best error = [ 6890.9474, 6890.9474, 6890.9474, 6890.9474] +25-08-31 04:38:52 | D | + error = 6890.9474 +25-08-31 04:38:52 | D | + scale = [min=0.0434, max=1.7173] +25-08-31 04:38:52 | D | - transformer_blocks.9.ff_context.up_proj +25-08-31 04:38:52 | D | + w: sfp4_e2m1_all +25-08-31 04:38:52 | D | + x: sfp4_e2m1_all +25-08-31 04:38:52 | D | + y: None +25-08-31 04:38:52 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:38:52 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:38:52 | D | + x - AbsMax +25-08-31 04:38:52 | D | + x = [min=0.1846, max=16.6250] +25-08-31 04:38:52 | D | + w - AbsMax +25-08-31 04:38:52 | D | + w = [min=0.0796, max=0.5117] +25-08-31 04:38:52 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:38:53 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:39:36 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:39:36 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:39:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:39:36 | D | - sum error = [ 3916.7479, 3854.5838, 3773.8276, 3701.2163, 3667.4422] +25-08-31 04:39:36 | D | - best error = [ 3916.7479, 3854.5838, 3773.8276, 3701.2163, 3667.4422] +25-08-31 04:39:36 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:39:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:39:36 | D | - sum error = [ 3608.4540, 3574.1154, 3512.0503, 3514.2359, 3500.8393] +25-08-31 04:39:36 | D | - best error = [ 3608.4540, 3574.1154, 3512.0503, 3512.0503, 3500.8393] +25-08-31 04:39:36 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:39:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:39:36 | D | - sum error = [ 3474.3014, 3451.4733, 3448.2619, 3458.6863, 3458.9842] +25-08-31 04:39:36 | D | - best error = [ 3474.3014, 3451.4733, 3448.2619, 3448.2619, 3448.2619] +25-08-31 04:39:36 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:39:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:39:36 | D | - sum error = [ 3464.3600, 3453.8274, 3474.6108, 3483.8606, 3511.5035] +25-08-31 04:39:36 | D | - best error = [ 3448.2619, 3448.2619, 3448.2619, 3448.2619, 3448.2619] +25-08-31 04:39:36 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:39:36 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:39:36 | D | - sum error = [ 3719.9449, 3671.3414, 3620.2961, 3598.8638, 3551.4321] +25-08-31 04:39:36 | D | - best error = [ 3448.2619, 3448.2619, 3448.2619, 3448.2619, 3448.2619] +25-08-31 04:39:36 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:39:36 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:39:36 | D | - sum error = [ 3501.6191, 3478.8351, 3453.5091, 3433.8110, 3440.3378] +25-08-31 04:39:36 | D | - best error = [ 3448.2619, 3448.2619, 3448.2619, 3433.8110, 3433.8110] +25-08-31 04:39:36 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:39:36 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:39:36 | D | - sum error = [ 3426.6412, 3405.7340, 3417.0799, 3397.5487, 3414.8495] +25-08-31 04:39:36 | D | - best error = [ 3426.6412, 3405.7340, 3405.7340, 3397.5487, 3397.5487] +25-08-31 04:39:36 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:39:36 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:39:36 | D | - sum error = [ 3426.3155, 3433.1858, 3471.4231, 3492.1661] +25-08-31 04:39:36 | D | - best error = [ 3397.5487, 3397.5487, 3397.5487, 3397.5487] +25-08-31 04:39:36 | D | + error = 3397.5487 +25-08-31 04:39:36 | D | + scale = [min=0.5112, max=13.4126] +25-08-31 04:39:36 | D | - transformer_blocks.9.ff_context.down_proj +25-08-31 04:39:36 | D | + w: sfp4_e2m1_all +25-08-31 04:39:36 | D | + x: sfp4_e2m1_all +25-08-31 04:39:36 | D | + y: None +25-08-31 04:39:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:39:36 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:39:36 | D | + x - AbsMax +25-08-31 04:39:36 | D | + x = [min=0.0000, max=39.0000] +25-08-31 04:39:36 | D | + w - AbsMax +25-08-31 04:39:36 | D | + w = [min=0.0228, max=0.5508] +25-08-31 04:39:36 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:39:37 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:40:25 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:40:25 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:40:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:40:25 | D | - sum error = [ 803.9567, 786.8217, 771.7881, 758.6542, 747.9504] +25-08-31 04:40:25 | D | - best error = [ 803.9567, 786.8217, 771.7881, 758.6542, 747.9504] +25-08-31 04:40:25 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:40:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:40:25 | D | - sum error = [ 742.2682, 735.6785, 731.5829, 728.3057, 731.7248] +25-08-31 04:40:25 | D | - best error = [ 742.2682, 735.6785, 731.5829, 728.3057, 728.3057] +25-08-31 04:40:25 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:40:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:40:25 | D | - sum error = [ 729.9449, 736.1336, 736.7080, 741.3105, 753.7470] +25-08-31 04:40:25 | D | - best error = [ 728.3057, 728.3057, 728.3057, 728.3057, 728.3057] +25-08-31 04:40:25 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:40:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:40:25 | D | - sum error = [ 757.8299, 772.9002, 790.3537, 812.6628, 837.3892] +25-08-31 04:40:25 | D | - best error = [ 728.3057, 728.3057, 728.3057, 728.3057, 728.3057] +25-08-31 04:40:25 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:40:25 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:40:25 | D | - sum error = [ 869.7378, 850.0587, 833.0906, 820.4461, 809.4531] +25-08-31 04:40:25 | D | - best error = [ 728.3057, 728.3057, 728.3057, 728.3057, 728.3057] +25-08-31 04:40:25 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:40:25 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:40:25 | D | - sum error = [ 802.3258, 797.1757, 787.4737, 786.7655, 783.2931] +25-08-31 04:40:25 | D | - best error = [ 728.3057, 728.3057, 728.3057, 728.3057, 728.3057] +25-08-31 04:40:25 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:40:25 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:40:25 | D | - sum error = [ 781.7784, 785.1694, 785.5151, 791.0051, 795.8498] +25-08-31 04:40:25 | D | - best error = [ 728.3057, 728.3057, 728.3057, 728.3057, 728.3057] +25-08-31 04:40:25 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:40:25 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:40:25 | D | - sum error = [ 802.0244, 812.7109, 829.9941, 845.5577] +25-08-31 04:40:25 | D | - best error = [ 728.3057, 728.3057, 728.3057, 728.3057] +25-08-31 04:40:25 | D | + error = 728.3057 +25-08-31 04:40:25 | D | + scale = [min=0.0019, max=4.3294] +25-08-31 04:40:43 | D | - Smoothing Diffusion Block transformer_blocks.10 +25-08-31 04:40:43 | D | - Skipping Module transformer_blocks.10.norm1.linear +25-08-31 04:40:43 | D | - Skipping Module transformer_blocks.10.norm1_context.linear +25-08-31 04:40:43 | D | - Smoothing Transformer Block transformer_blocks.10 +25-08-31 04:40:43 | D | - transformer_blocks.10.attn.qkv_proj +25-08-31 04:40:43 | D | + w: sfp4_e2m1_all +25-08-31 04:40:43 | D | + x: sfp4_e2m1_all +25-08-31 04:40:43 | D | + y: None +25-08-31 04:40:43 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:40:43 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:40:44 | D | + x - AbsMax +25-08-31 04:40:44 | D | + x = [min=0.0874, max=14.8125] +25-08-31 04:40:44 | D | + w - AbsMax +25-08-31 04:40:44 | D | + w = [min=0.1113, max=0.4766] +25-08-31 04:40:44 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:40:45 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:42:04 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:42:04 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:42:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:42:04 | D | - sum error = [ 4374.2972, 4287.3148, 4171.2705, 4111.1984, 4041.7038] +25-08-31 04:42:04 | D | - best error = [ 4374.2972, 4287.3148, 4171.2705, 4111.1984, 4041.7038] +25-08-31 04:42:04 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:42:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:42:04 | D | - sum error = [ 3957.9841, 3939.7680, 3884.3369, 3899.1499, 3892.4952] +25-08-31 04:42:04 | D | - best error = [ 3957.9841, 3939.7680, 3884.3369, 3884.3369, 3884.3369] +25-08-31 04:42:04 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:42:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:42:04 | D | - sum error = [ 3863.6522, 3852.9652, 3891.4415, 3912.2001, 3939.6883] +25-08-31 04:42:04 | D | - best error = [ 3863.6522, 3852.9652, 3852.9652, 3852.9652, 3852.9652] +25-08-31 04:42:04 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:42:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:42:04 | D | - sum error = [ 4043.7488, 4057.6901, 4123.9551, 4222.8535, 4312.7703] +25-08-31 04:42:04 | D | - best error = [ 3852.9652, 3852.9652, 3852.9652, 3852.9652, 3852.9652] +25-08-31 04:42:04 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:42:04 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:42:04 | D | - sum error = [ 5538.8602, 5315.6313, 5193.5745, 4904.4415, 4810.6099] +25-08-31 04:42:04 | D | - best error = [ 3852.9652, 3852.9652, 3852.9652, 3852.9652, 3852.9652] +25-08-31 04:42:04 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:42:04 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:42:04 | D | - sum error = [ 4626.5266, 4516.0959, 4398.3758, 4329.9164, 4193.8858] +25-08-31 04:42:04 | D | - best error = [ 3852.9652, 3852.9652, 3852.9652, 3852.9652, 3852.9652] +25-08-31 04:42:04 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:42:04 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:42:04 | D | - sum error = [ 4155.6545, 4060.6977, 4114.6361, 4108.2067, 4136.0322] +25-08-31 04:42:04 | D | - best error = [ 3852.9652, 3852.9652, 3852.9652, 3852.9652, 3852.9652] +25-08-31 04:42:04 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:42:04 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:42:04 | D | - sum error = [ 4148.9184, 4229.2782, 4220.0936, 4334.1444] +25-08-31 04:42:04 | D | - best error = [ 3852.9652, 3852.9652, 3852.9652, 3852.9652] +25-08-31 04:42:04 | D | + error = 3852.9652 +25-08-31 04:42:04 | D | + scale = [min=0.2617, max=4.4040] +25-08-31 04:42:04 | D | - transformer_blocks.10.attn add_qkv_proj +25-08-31 04:42:04 | D | + w: sfp4_e2m1_all +25-08-31 04:42:04 | D | + x: sfp4_e2m1_all +25-08-31 04:42:04 | D | + y: None +25-08-31 04:42:04 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:42:04 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:42:04 | D | + x - AbsMax +25-08-31 04:42:04 | D | + x = [min=0.2070, max=15.0000] +25-08-31 04:42:04 | D | + w - AbsMax +25-08-31 04:42:04 | D | + w = [min=0.1191, max=0.3770] +25-08-31 04:42:04 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:42:05 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:43:08 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:43:08 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:43:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:43:08 | D | - sum error = [ 1263.7836, 1202.8238, 1188.5479, 1161.6690, 1172.0514] +25-08-31 04:43:08 | D | - best error = [ 1263.7836, 1202.8238, 1188.5479, 1161.6690, 1161.6690] +25-08-31 04:43:08 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:43:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:43:08 | D | - sum error = [ 1148.5980, 1127.3504, 1109.6296, 1124.5722, 1100.6063] +25-08-31 04:43:08 | D | - best error = [ 1148.5980, 1127.3504, 1109.6296, 1109.6296, 1100.6063] +25-08-31 04:43:08 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:43:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:43:08 | D | - sum error = [ 1094.4129, 1088.4974, 1087.0642, 1089.3606, 1097.2099] +25-08-31 04:43:08 | D | - best error = [ 1094.4129, 1088.4974, 1087.0642, 1087.0642, 1087.0642] +25-08-31 04:43:08 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:43:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:43:08 | D | - sum error = [ 1101.7749, 1070.4143, 1111.9214, 1099.7393, 1114.5176] +25-08-31 04:43:08 | D | - best error = [ 1087.0642, 1070.4143, 1070.4143, 1070.4143, 1070.4143] +25-08-31 04:43:08 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:43:08 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:43:08 | D | - sum error = [ 1343.1707, 1341.1204, 1261.2960, 1234.8258, 1201.9664] +25-08-31 04:43:08 | D | - best error = [ 1070.4143, 1070.4143, 1070.4143, 1070.4143, 1070.4143] +25-08-31 04:43:08 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:43:08 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:43:08 | D | - sum error = [ 1200.3249, 1195.1715, 1158.6422, 1121.6331, 1111.2263] +25-08-31 04:43:08 | D | - best error = [ 1070.4143, 1070.4143, 1070.4143, 1070.4143, 1070.4143] +25-08-31 04:43:08 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:43:08 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:43:08 | D | - sum error = [ 1089.4754, 1068.4734, 1068.0317, 1093.4727, 1080.3061] +25-08-31 04:43:08 | D | - best error = [ 1070.4143, 1068.4734, 1068.0317, 1068.0317, 1068.0317] +25-08-31 04:43:08 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:43:08 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:43:08 | D | - sum error = [ 1098.9887, 1084.3150, 1119.4115, 1131.3146] +25-08-31 04:43:08 | D | - best error = [ 1068.0317, 1068.0317, 1068.0317, 1068.0317] +25-08-31 04:43:08 | D | + error = 1068.0317 +25-08-31 04:43:08 | D | + scale = [min=0.6926, max=9.6543] +25-08-31 04:43:08 | D | - transformer_blocks.10.attn.out_proj + transformer_blocks.10.attn.add_out_proj +25-08-31 04:43:08 | D | + w: sfp4_e2m1_all +25-08-31 04:43:08 | D | + x: sfp4_e2m1_all +25-08-31 04:43:08 | D | + y: None +25-08-31 04:43:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:43:08 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:43:08 | D | + x - AbsMax +25-08-31 04:43:08 | D | + x = [min=2.5312, max=18.2500] +25-08-31 04:43:08 | D | + w - AbsMax +25-08-31 04:43:08 | D | + w = [min=0.1235, max=0.2637] +25-08-31 04:43:08 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:43:09 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:44:54 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:44:54 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:44:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:44:54 | D | - sum error = [ 8092.6159, 8095.0511, 8084.5203, 8075.7023, 8072.4712] +25-08-31 04:44:54 | D | - best error = [ 8092.6159, 8092.6159, 8084.5203, 8075.7023, 8072.4712] +25-08-31 04:44:54 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:44:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:44:54 | D | - sum error = [ 8061.1002, 8083.9458, 8069.0354, 8075.6776, 8084.4042] +25-08-31 04:44:54 | D | - best error = [ 8061.1002, 8061.1002, 8061.1002, 8061.1002, 8061.1002] +25-08-31 04:44:54 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:44:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:44:54 | D | - sum error = [ 8108.4551, 8126.6035, 8138.7119, 8150.0618, 8181.7294] +25-08-31 04:44:54 | D | - best error = [ 8061.1002, 8061.1002, 8061.1002, 8061.1002, 8061.1002] +25-08-31 04:44:54 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:44:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:44:54 | D | - sum error = [ 8201.8427, 8218.4972, 8242.5025, 8296.6957, 8299.4458] +25-08-31 04:44:54 | D | - best error = [ 8061.1002, 8061.1002, 8061.1002, 8061.1002, 8061.1002] +25-08-31 04:44:54 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:44:54 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:44:54 | D | - sum error = [ 8177.1411, 8150.9649, 8136.7450, 8128.2557, 8126.6030] +25-08-31 04:44:54 | D | - best error = [ 8061.1002, 8061.1002, 8061.1002, 8061.1002, 8061.1002] +25-08-31 04:44:54 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:44:54 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:44:54 | D | - sum error = [ 8111.4988, 8122.2260, 8110.0446, 8118.3545, 8125.8742] +25-08-31 04:44:54 | D | - best error = [ 8061.1002, 8061.1002, 8061.1002, 8061.1002, 8061.1002] +25-08-31 04:44:54 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:44:54 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:44:54 | D | - sum error = [ 8135.9228, 8129.4898, 8154.5855, 8186.2705, 8195.9829] +25-08-31 04:44:54 | D | - best error = [ 8061.1002, 8061.1002, 8061.1002, 8061.1002, 8061.1002] +25-08-31 04:44:54 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:44:54 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:44:54 | D | - sum error = [ 8235.7110, 8246.2690, 8278.1607, 8307.6745] +25-08-31 04:44:54 | D | - best error = [ 8061.1002, 8061.1002, 8061.1002, 8061.1002] +25-08-31 04:44:54 | D | + error = 8061.1002 +25-08-31 04:44:54 | D | + scale = [min=1.2613, max=2.0669] +25-08-31 04:44:54 | D | - transformer_blocks.10.ff.up_proj +25-08-31 04:44:54 | D | + w: sfp4_e2m1_all +25-08-31 04:44:54 | D | + x: sfp4_e2m1_all +25-08-31 04:44:54 | D | + y: None +25-08-31 04:44:54 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:44:54 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:44:55 | D | + x - AbsMax +25-08-31 04:44:55 | D | + x = [min=0.1318, max=11.7500] +25-08-31 04:44:55 | D | + w - AbsMax +25-08-31 04:44:55 | D | + w = [min=0.1162, max=0.4863] +25-08-31 04:44:55 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:44:56 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:46:12 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:46:12 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:46:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:46:12 | D | - sum error = [ 8715.9748, 8616.6304, 8501.1002, 8408.8677, 8329.2285] +25-08-31 04:46:12 | D | - best error = [ 8715.9748, 8616.6304, 8501.1002, 8408.8677, 8329.2285] +25-08-31 04:46:12 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:46:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:46:12 | D | - sum error = [ 8256.9079, 8210.0100, 8168.1934, 8133.4549, 8099.2716] +25-08-31 04:46:12 | D | - best error = [ 8256.9079, 8210.0100, 8168.1934, 8133.4549, 8099.2716] +25-08-31 04:46:12 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:46:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:46:12 | D | - sum error = [ 8101.6808, 8070.9828, 8087.5930, 8085.1580, 8118.1389] +25-08-31 04:46:12 | D | - best error = [ 8099.2716, 8070.9828, 8070.9828, 8070.9828, 8070.9828] +25-08-31 04:46:12 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:46:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:46:12 | D | - sum error = [ 8146.5303, 8198.8472, 8249.2211, 8317.8233, 8394.9456] +25-08-31 04:46:12 | D | - best error = [ 8070.9828, 8070.9828, 8070.9828, 8070.9828, 8070.9828] +25-08-31 04:46:12 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:46:12 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:46:12 | D | - sum error = [ 9065.5586, 8889.8459, 8732.5185, 8581.7597, 8483.7473] +25-08-31 04:46:12 | D | - best error = [ 8070.9828, 8070.9828, 8070.9828, 8070.9828, 8070.9828] +25-08-31 04:46:12 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:46:12 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:46:12 | D | - sum error = [ 8396.0600, 8301.0399, 8238.4299, 8183.5855, 8156.5409] +25-08-31 04:46:12 | D | - best error = [ 8070.9828, 8070.9828, 8070.9828, 8070.9828, 8070.9828] +25-08-31 04:46:12 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:46:12 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:46:12 | D | - sum error = [ 8127.6011, 8104.3776, 8120.9470, 8142.3191, 8139.4833] +25-08-31 04:46:12 | D | - best error = [ 8070.9828, 8070.9828, 8070.9828, 8070.9828, 8070.9828] +25-08-31 04:46:12 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:46:12 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:46:12 | D | - sum error = [ 8190.6268, 8259.2581, 8310.1923, 8380.7933] +25-08-31 04:46:12 | D | - best error = [ 8070.9828, 8070.9828, 8070.9828, 8070.9828] +25-08-31 04:46:12 | D | + error = 8070.9828 +25-08-31 04:46:12 | D | + scale = [min=0.3281, max=3.8772] +25-08-31 04:46:12 | D | - transformer_blocks.10.ff.down_proj +25-08-31 04:46:12 | D | + w: sfp4_e2m1_all +25-08-31 04:46:12 | D | + x: sfp4_e2m1_all +25-08-31 04:46:12 | D | + y: None +25-08-31 04:46:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:46:12 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:46:15 | D | + x - AbsMax +25-08-31 04:46:15 | D | + x = [min=0.0000, max=16.7500] +25-08-31 04:46:15 | D | + w - AbsMax +25-08-31 04:46:15 | D | + w = [min=0.0500, max=0.8203] +25-08-31 04:46:15 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:46:17 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:48:08 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:48:08 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:48:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:48:08 | D | - sum error = [ 6581.3524, 6565.1425, 6545.3617, 6536.3020, 6523.2798] +25-08-31 04:48:08 | D | - best error = [ 6581.3524, 6565.1425, 6545.3617, 6536.3020, 6523.2798] +25-08-31 04:48:08 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:48:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:48:08 | D | - sum error = [ 6518.4372, 6516.9186, 6514.8664, 6522.7666, 6531.0162] +25-08-31 04:48:08 | D | - best error = [ 6518.4372, 6516.9186, 6514.8664, 6514.8664, 6514.8664] +25-08-31 04:48:08 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:48:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:48:08 | D | - sum error = [ 6543.4361, 6555.9365, 6561.8936, 6583.1661, 6592.3465] +25-08-31 04:48:08 | D | - best error = [ 6514.8664, 6514.8664, 6514.8664, 6514.8664, 6514.8664] +25-08-31 04:48:08 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:48:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:48:08 | D | - sum error = [ 6618.0051, 6648.4660, 6682.2038, 6726.4481, 6750.3976] +25-08-31 04:48:08 | D | - best error = [ 6514.8664, 6514.8664, 6514.8664, 6514.8664, 6514.8664] +25-08-31 04:48:08 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:48:08 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:48:08 | D | - sum error = [ 6847.6761, 6795.3025, 6734.6878, 6696.1272, 6671.4337] +25-08-31 04:48:08 | D | - best error = [ 6514.8664, 6514.8664, 6514.8664, 6514.8664, 6514.8664] +25-08-31 04:48:08 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:48:08 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:48:08 | D | - sum error = [ 6629.7765, 6600.1970, 6561.4730, 6542.2268, 6528.0852] +25-08-31 04:48:08 | D | - best error = [ 6514.8664, 6514.8664, 6514.8664, 6514.8664, 6514.8664] +25-08-31 04:48:08 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:48:08 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:48:08 | D | - sum error = [ 6521.2897, 6528.9607, 6525.6816, 6543.7955, 6568.3194] +25-08-31 04:48:08 | D | - best error = [ 6514.8664, 6514.8664, 6514.8664, 6514.8664, 6514.8664] +25-08-31 04:48:08 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:48:08 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:48:08 | D | - sum error = [ 6599.7024, 6673.9182, 6689.7779, 6741.3405] +25-08-31 04:48:08 | D | - best error = [ 6514.8664, 6514.8664, 6514.8664, 6514.8664] +25-08-31 04:48:08 | D | + error = 6514.8664 +25-08-31 04:48:08 | D | + scale = [min=0.0041, max=2.6817] +25-08-31 04:48:08 | D | - transformer_blocks.10.ff_context.up_proj +25-08-31 04:48:08 | D | + w: sfp4_e2m1_all +25-08-31 04:48:08 | D | + x: sfp4_e2m1_all +25-08-31 04:48:08 | D | + y: None +25-08-31 04:48:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:48:08 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:48:08 | D | + x - AbsMax +25-08-31 04:48:08 | D | + x = [min=0.1758, max=80.5000] +25-08-31 04:48:08 | D | + w - AbsMax +25-08-31 04:48:08 | D | + w = [min=0.1250, max=0.4512] +25-08-31 04:48:08 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:48:09 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:48:52 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:48:52 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:48:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:48:52 | D | - sum error = [ 4478.8204, 4318.8950, 4151.7724, 3971.2492, 3828.5069] +25-08-31 04:48:52 | D | - best error = [ 4478.8204, 4318.8950, 4151.7724, 3971.2492, 3828.5069] +25-08-31 04:48:52 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:48:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:48:52 | D | - sum error = [ 3638.1209, 3547.9247, 3416.3782, 3290.3287, 3194.3034] +25-08-31 04:48:52 | D | - best error = [ 3638.1209, 3547.9247, 3416.3782, 3290.3287, 3194.3034] +25-08-31 04:48:52 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:48:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:48:52 | D | - sum error = [ 3118.6013, 3034.7634, 2998.3776, 2940.2575, 2906.3804] +25-08-31 04:48:52 | D | - best error = [ 3118.6013, 3034.7634, 2998.3776, 2940.2575, 2906.3804] +25-08-31 04:48:52 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:48:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:48:52 | D | - sum error = [ 2904.8298, 2905.5316, 2896.4635, 2910.4213, 2943.0776] +25-08-31 04:48:52 | D | - best error = [ 2904.8298, 2904.8298, 2896.4635, 2896.4635, 2896.4635] +25-08-31 04:48:52 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:48:52 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:48:52 | D | - sum error = [ 4984.5939, 4709.7563, 4513.9338, 4282.2127, 4053.3904] +25-08-31 04:48:52 | D | - best error = [ 2896.4635, 2896.4635, 2896.4635, 2896.4635, 2896.4635] +25-08-31 04:48:52 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:48:52 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:48:52 | D | - sum error = [ 3871.8915, 3733.6233, 3571.6131, 3430.1162, 3319.0592] +25-08-31 04:48:52 | D | - best error = [ 2896.4635, 2896.4635, 2896.4635, 2896.4635, 2896.4635] +25-08-31 04:48:52 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:48:52 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:48:52 | D | - sum error = [ 3212.4844, 3091.0230, 3019.5531, 2954.3233, 2919.3446] +25-08-31 04:48:52 | D | - best error = [ 2896.4635, 2896.4635, 2896.4635, 2896.4635, 2896.4635] +25-08-31 04:48:52 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:48:52 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:48:52 | D | - sum error = [ 2911.6250, 2902.5332, 2927.2923, 2948.6421] +25-08-31 04:48:52 | D | - best error = [ 2896.4635, 2896.4635, 2896.4635, 2896.4635] +25-08-31 04:48:52 | D | + error = 2896.4635 +25-08-31 04:48:52 | D | + scale = [min=0.2282, max=41.6799] +25-08-31 04:48:52 | D | - transformer_blocks.10.ff_context.down_proj +25-08-31 04:48:52 | D | + w: sfp4_e2m1_all +25-08-31 04:48:52 | D | + x: sfp4_e2m1_all +25-08-31 04:48:52 | D | + y: None +25-08-31 04:48:52 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:48:52 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:48:52 | D | + x - AbsMax +25-08-31 04:48:52 | D | + x = [min=0.0000, max=45.7500] +25-08-31 04:48:52 | D | + w - AbsMax +25-08-31 04:48:52 | D | + w = [min=0.0233, max=0.7188] +25-08-31 04:48:52 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:48:53 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:49:40 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:49:40 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:49:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:49:40 | D | - sum error = [ 1806.0008, 1724.6832, 1685.2332, 1619.0020, 1602.5349] +25-08-31 04:49:40 | D | - best error = [ 1806.0008, 1724.6832, 1685.2332, 1619.0020, 1602.5349] +25-08-31 04:49:40 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:49:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:49:40 | D | - sum error = [ 1585.5211, 1562.5100, 1530.4797, 1539.2158, 1526.4373] +25-08-31 04:49:40 | D | - best error = [ 1585.5211, 1562.5100, 1530.4797, 1530.4797, 1526.4373] +25-08-31 04:49:40 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:49:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:49:40 | D | - sum error = [ 1505.6947, 1492.6090, 1502.1365, 1511.8847, 1498.6189] +25-08-31 04:49:40 | D | - best error = [ 1505.6947, 1492.6090, 1492.6090, 1492.6090, 1492.6090] +25-08-31 04:49:40 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:49:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:49:40 | D | - sum error = [ 1492.3551, 1494.7586, 1514.1715, 1530.8720, 1529.0561] +25-08-31 04:49:40 | D | - best error = [ 1492.3551, 1492.3551, 1492.3551, 1492.3551, 1492.3551] +25-08-31 04:49:40 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:49:40 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:49:40 | D | - sum error = [ 1904.4638, 1871.2342, 1776.1734, 1710.8756, 1646.2539] +25-08-31 04:49:40 | D | - best error = [ 1492.3551, 1492.3551, 1492.3551, 1492.3551, 1492.3551] +25-08-31 04:49:40 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:49:40 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:49:40 | D | - sum error = [ 1630.8707, 1585.8878, 1607.2860, 1567.6662, 1562.0515] +25-08-31 04:49:40 | D | - best error = [ 1492.3551, 1492.3551, 1492.3551, 1492.3551, 1492.3551] +25-08-31 04:49:40 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:49:40 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:49:40 | D | - sum error = [ 1542.0582, 1536.7614, 1525.9391, 1525.1349, 1517.5071] +25-08-31 04:49:40 | D | - best error = [ 1492.3551, 1492.3551, 1492.3551, 1492.3551, 1492.3551] +25-08-31 04:49:40 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:49:40 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:49:40 | D | - sum error = [ 1535.2795, 1527.9285, 1535.3312, 1529.2594] +25-08-31 04:49:40 | D | - best error = [ 1492.3551, 1492.3551, 1492.3551, 1492.3551] +25-08-31 04:49:40 | D | + error = 1492.3551 +25-08-31 04:49:40 | D | + scale = [min=0.0000, max=17.5911] +25-08-31 04:49:59 | D | - Smoothing Diffusion Block transformer_blocks.11 +25-08-31 04:49:59 | D | - Skipping Module transformer_blocks.11.norm1.linear +25-08-31 04:49:59 | D | - Skipping Module transformer_blocks.11.norm1_context.linear +25-08-31 04:49:59 | D | - Smoothing Transformer Block transformer_blocks.11 +25-08-31 04:49:59 | D | - transformer_blocks.11.attn.qkv_proj +25-08-31 04:49:59 | D | + w: sfp4_e2m1_all +25-08-31 04:49:59 | D | + x: sfp4_e2m1_all +25-08-31 04:49:59 | D | + y: None +25-08-31 04:49:59 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:49:59 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:49:59 | D | + x - AbsMax +25-08-31 04:49:59 | D | + x = [min=0.1123, max=16.8750] +25-08-31 04:49:59 | D | + w - AbsMax +25-08-31 04:49:59 | D | + w = [min=0.1089, max=0.5273] +25-08-31 04:49:59 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:50:00 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:51:13 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:51:13 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:51:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:51:13 | D | - sum error = [ 5180.1822, 5122.7375, 4983.5861, 4861.9591, 4868.7448] +25-08-31 04:51:13 | D | - best error = [ 5180.1822, 5122.7375, 4983.5861, 4861.9591, 4861.9591] +25-08-31 04:51:13 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:51:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:51:13 | D | - sum error = [ 4710.9897, 4662.4343, 4626.9746, 4743.6918, 4677.7772] +25-08-31 04:51:13 | D | - best error = [ 4710.9897, 4662.4343, 4626.9746, 4626.9746, 4626.9746] +25-08-31 04:51:13 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:51:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:51:13 | D | - sum error = [ 4590.1887, 4660.0558, 4556.6837, 4611.9059, 4629.6648] +25-08-31 04:51:13 | D | - best error = [ 4590.1887, 4590.1887, 4556.6837, 4556.6837, 4556.6837] +25-08-31 04:51:13 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:51:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:51:13 | D | - sum error = [ 4625.7103, 4725.6940, 4810.1048, 4837.8687, 4917.3824] +25-08-31 04:51:13 | D | - best error = [ 4556.6837, 4556.6837, 4556.6837, 4556.6837, 4556.6837] +25-08-31 04:51:13 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:51:13 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:51:13 | D | - sum error = [ 6838.2261, 6516.4042, 6240.9855, 5956.1504, 5664.9872] +25-08-31 04:51:13 | D | - best error = [ 4556.6837, 4556.6837, 4556.6837, 4556.6837, 4556.6837] +25-08-31 04:51:13 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:51:13 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:51:13 | D | - sum error = [ 5553.7245, 5415.9333, 5269.8017, 5081.4285, 5060.0308] +25-08-31 04:51:13 | D | - best error = [ 4556.6837, 4556.6837, 4556.6837, 4556.6837, 4556.6837] +25-08-31 04:51:13 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:51:13 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:51:13 | D | - sum error = [ 4962.7392, 4878.4910, 4845.9901, 4826.0116, 4891.1191] +25-08-31 04:51:13 | D | - best error = [ 4556.6837, 4556.6837, 4556.6837, 4556.6837, 4556.6837] +25-08-31 04:51:13 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:51:13 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:51:13 | D | - sum error = [ 4805.4301, 4883.3368, 4908.7976, 4954.7525] +25-08-31 04:51:13 | D | - best error = [ 4556.6837, 4556.6837, 4556.6837, 4556.6837] +25-08-31 04:51:13 | D | + error = 4556.6837 +25-08-31 04:51:13 | D | + scale = [min=0.2693, max=5.4494] +25-08-31 04:51:13 | D | - transformer_blocks.11.attn add_qkv_proj +25-08-31 04:51:13 | D | + w: sfp4_e2m1_all +25-08-31 04:51:13 | D | + x: sfp4_e2m1_all +25-08-31 04:51:13 | D | + y: None +25-08-31 04:51:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:51:13 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:51:13 | D | + x - AbsMax +25-08-31 04:51:13 | D | + x = [min=0.1738, max=29.0000] +25-08-31 04:51:13 | D | + w - AbsMax +25-08-31 04:51:13 | D | + w = [min=0.1099, max=0.7109] +25-08-31 04:51:13 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:51:14 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:52:17 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:52:17 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:52:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:52:17 | D | - sum error = [ 1378.0448, 1351.5946, 1364.8716, 1342.0510, 1290.5204] +25-08-31 04:52:17 | D | - best error = [ 1378.0448, 1351.5946, 1351.5946, 1342.0510, 1290.5204] +25-08-31 04:52:17 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:52:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:52:17 | D | - sum error = [ 1268.7533, 1270.9919, 1238.0930, 1228.2777, 1201.9801] +25-08-31 04:52:17 | D | - best error = [ 1268.7533, 1268.7533, 1238.0930, 1228.2777, 1201.9801] +25-08-31 04:52:17 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:52:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:52:17 | D | - sum error = [ 1212.4722, 1213.3528, 1217.8009, 1227.9077, 1237.8155] +25-08-31 04:52:17 | D | - best error = [ 1201.9801, 1201.9801, 1201.9801, 1201.9801, 1201.9801] +25-08-31 04:52:17 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:52:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:52:17 | D | - sum error = [ 1206.2948, 1211.6262, 1228.9905, 1256.5124, 1256.8463] +25-08-31 04:52:17 | D | - best error = [ 1201.9801, 1201.9801, 1201.9801, 1201.9801, 1201.9801] +25-08-31 04:52:17 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:52:17 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:52:17 | D | - sum error = [ 1669.8316, 1574.9443, 1535.2359, 1468.6415, 1407.6931] +25-08-31 04:52:17 | D | - best error = [ 1201.9801, 1201.9801, 1201.9801, 1201.9801, 1201.9801] +25-08-31 04:52:17 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:52:17 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:52:17 | D | - sum error = [ 1406.8603, 1338.2448, 1328.4010, 1266.9259, 1283.4920] +25-08-31 04:52:17 | D | - best error = [ 1201.9801, 1201.9801, 1201.9801, 1201.9801, 1201.9801] +25-08-31 04:52:17 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:52:17 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:52:17 | D | - sum error = [ 1266.9261, 1257.9274, 1245.1122, 1259.1868, 1246.6286] +25-08-31 04:52:17 | D | - best error = [ 1201.9801, 1201.9801, 1201.9801, 1201.9801, 1201.9801] +25-08-31 04:52:17 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:52:17 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:52:17 | D | - sum error = [ 1257.8944, 1250.5425, 1257.1436, 1273.0815] +25-08-31 04:52:17 | D | - best error = [ 1201.9801, 1201.9801, 1201.9801, 1201.9801] +25-08-31 04:52:17 | D | + error = 1201.9801 +25-08-31 04:52:17 | D | + scale = [min=0.4550, max=4.5507] +25-08-31 04:52:17 | D | - transformer_blocks.11.attn.out_proj + transformer_blocks.11.attn.add_out_proj +25-08-31 04:52:17 | D | + w: sfp4_e2m1_all +25-08-31 04:52:17 | D | + x: sfp4_e2m1_all +25-08-31 04:52:17 | D | + y: None +25-08-31 04:52:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:52:17 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:52:18 | D | + x - AbsMax +25-08-31 04:52:18 | D | + x = [min=2.4062, max=19.8750] +25-08-31 04:52:18 | D | + w - AbsMax +25-08-31 04:52:18 | D | + w = [min=0.1245, max=0.3652] +25-08-31 04:52:18 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:52:19 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:54:04 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:54:04 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:54:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:54:04 | D | - sum error = [ 8471.1794, 8476.9438, 8470.8436, 8464.1474, 8464.0123] +25-08-31 04:54:04 | D | - best error = [ 8471.1794, 8471.1794, 8470.8436, 8464.1474, 8464.0123] +25-08-31 04:54:04 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:54:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:54:04 | D | - sum error = [ 8507.4367, 8500.0696, 8484.3857, 8496.8738, 8528.3738] +25-08-31 04:54:04 | D | - best error = [ 8464.0123, 8464.0123, 8464.0123, 8464.0123, 8464.0123] +25-08-31 04:54:04 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:54:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:54:04 | D | - sum error = [ 8511.5799, 8527.1519, 8554.3229, 8562.2007, 8593.4570] +25-08-31 04:54:04 | D | - best error = [ 8464.0123, 8464.0123, 8464.0123, 8464.0123, 8464.0123] +25-08-31 04:54:04 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:54:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:54:04 | D | - sum error = [ 8603.4593, 8621.6161, 8633.5077, 8673.9606, 8688.3494] +25-08-31 04:54:04 | D | - best error = [ 8464.0123, 8464.0123, 8464.0123, 8464.0123, 8464.0123] +25-08-31 04:54:04 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:54:04 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:54:04 | D | - sum error = [ 8621.4774, 8628.5322, 8615.8733, 8582.9543, 8575.8363] +25-08-31 04:54:04 | D | - best error = [ 8464.0123, 8464.0123, 8464.0123, 8464.0123, 8464.0123] +25-08-31 04:54:04 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:54:04 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:54:04 | D | - sum error = [ 8560.7701, 8571.4806, 8554.2630, 8559.0004, 8570.5239] +25-08-31 04:54:04 | D | - best error = [ 8464.0123, 8464.0123, 8464.0123, 8464.0123, 8464.0123] +25-08-31 04:54:04 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:54:04 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:54:04 | D | - sum error = [ 8568.4780, 8583.1113, 8590.0328, 8623.6187, 8623.7421] +25-08-31 04:54:04 | D | - best error = [ 8464.0123, 8464.0123, 8464.0123, 8464.0123, 8464.0123] +25-08-31 04:54:04 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:54:04 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:54:04 | D | - sum error = [ 8644.8135, 8644.4896, 8664.6860, 8710.2060] +25-08-31 04:54:04 | D | - best error = [ 8464.0123, 8464.0123, 8464.0123, 8464.0123] +25-08-31 04:54:04 | D | + error = 8464.0123 +25-08-31 04:54:04 | D | + scale = [min=1.1920, max=1.8183] +25-08-31 04:54:05 | D | - transformer_blocks.11.ff.up_proj +25-08-31 04:54:05 | D | + w: sfp4_e2m1_all +25-08-31 04:54:05 | D | + x: sfp4_e2m1_all +25-08-31 04:54:05 | D | + y: None +25-08-31 04:54:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:54:05 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:54:05 | D | + x - AbsMax +25-08-31 04:54:05 | D | + x = [min=0.1162, max=10.6875] +25-08-31 04:54:05 | D | + w - AbsMax +25-08-31 04:54:05 | D | + w = [min=0.1123, max=0.4980] +25-08-31 04:54:05 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:54:06 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:55:21 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:55:21 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:55:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:55:21 | D | - sum error = [ 8570.4996, 8478.4664, 8399.6724, 8322.4146, 8263.0087] +25-08-31 04:55:21 | D | - best error = [ 8570.4996, 8478.4664, 8399.6724, 8322.4146, 8263.0087] +25-08-31 04:55:21 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:55:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:55:21 | D | - sum error = [ 8218.0981, 8169.0127, 8130.4964, 8107.8777, 8082.1091] +25-08-31 04:55:21 | D | - best error = [ 8218.0981, 8169.0127, 8130.4964, 8107.8777, 8082.1091] +25-08-31 04:55:21 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:55:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:55:21 | D | - sum error = [ 8070.1715, 8062.0414, 8070.4151, 8094.2478, 8111.1797] +25-08-31 04:55:21 | D | - best error = [ 8070.1715, 8062.0414, 8062.0414, 8062.0414, 8062.0414] +25-08-31 04:55:21 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:55:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:55:21 | D | - sum error = [ 8141.9086, 8175.4334, 8225.6564, 8262.2099, 8327.6273] +25-08-31 04:55:21 | D | - best error = [ 8062.0414, 8062.0414, 8062.0414, 8062.0414, 8062.0414] +25-08-31 04:55:21 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:55:21 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:55:21 | D | - sum error = [ 8814.4808, 8687.3711, 8588.2841, 8471.5094, 8374.2782] +25-08-31 04:55:21 | D | - best error = [ 8062.0414, 8062.0414, 8062.0414, 8062.0414, 8062.0414] +25-08-31 04:55:21 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:55:21 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:55:21 | D | - sum error = [ 8311.7642, 8234.3505, 8199.2195, 8149.4756, 8130.3781] +25-08-31 04:55:21 | D | - best error = [ 8062.0414, 8062.0414, 8062.0414, 8062.0414, 8062.0414] +25-08-31 04:55:21 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:55:21 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:55:21 | D | - sum error = [ 8125.7854, 8105.8381, 8102.7793, 8103.1693, 8140.9535] +25-08-31 04:55:21 | D | - best error = [ 8062.0414, 8062.0414, 8062.0414, 8062.0414, 8062.0414] +25-08-31 04:55:21 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:55:21 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:55:21 | D | - sum error = [ 8177.2619, 8203.9473, 8255.4733, 8333.6796] +25-08-31 04:55:21 | D | - best error = [ 8062.0414, 8062.0414, 8062.0414, 8062.0414] +25-08-31 04:55:21 | D | + error = 8062.0414 +25-08-31 04:55:21 | D | + scale = [min=0.3061, max=3.6803] +25-08-31 04:55:21 | D | - transformer_blocks.11.ff.down_proj +25-08-31 04:55:21 | D | + w: sfp4_e2m1_all +25-08-31 04:55:21 | D | + x: sfp4_e2m1_all +25-08-31 04:55:21 | D | + y: None +25-08-31 04:55:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:55:21 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:55:23 | D | + x - AbsMax +25-08-31 04:55:23 | D | + x = [min=0.0000, max=22.1250] +25-08-31 04:55:23 | D | + w - AbsMax +25-08-31 04:55:23 | D | + w = [min=0.0532, max=0.6953] +25-08-31 04:55:23 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:55:25 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:57:12 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:57:12 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:57:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:57:12 | D | - sum error = [ 6559.3695, 6548.1191, 6541.9013, 6518.4403, 6525.6653] +25-08-31 04:57:12 | D | - best error = [ 6559.3695, 6548.1191, 6541.9013, 6518.4403, 6518.4403] +25-08-31 04:57:12 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:57:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:57:12 | D | - sum error = [ 6518.1991, 6527.4350, 6539.0736, 6539.8761, 6552.9627] +25-08-31 04:57:12 | D | - best error = [ 6518.1991, 6518.1991, 6518.1991, 6518.1991, 6518.1991] +25-08-31 04:57:12 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:57:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:57:12 | D | - sum error = [ 6574.0891, 6582.7198, 6609.5655, 6635.2320, 6649.8448] +25-08-31 04:57:12 | D | - best error = [ 6518.1991, 6518.1991, 6518.1991, 6518.1991, 6518.1991] +25-08-31 04:57:12 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:57:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:57:12 | D | - sum error = [ 6683.1130, 6726.0764, 6763.4716, 6814.6296, 6863.9630] +25-08-31 04:57:12 | D | - best error = [ 6518.1991, 6518.1991, 6518.1991, 6518.1991, 6518.1991] +25-08-31 04:57:12 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:57:12 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:57:12 | D | - sum error = [ 6843.9863, 6788.9852, 6738.2814, 6705.4253, 6658.1283] +25-08-31 04:57:12 | D | - best error = [ 6518.1991, 6518.1991, 6518.1991, 6518.1991, 6518.1991] +25-08-31 04:57:12 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:57:12 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:57:12 | D | - sum error = [ 6620.3486, 6589.5238, 6589.2221, 6558.2016, 6621.5690] +25-08-31 04:57:12 | D | - best error = [ 6518.1991, 6518.1991, 6518.1991, 6518.1991, 6518.1991] +25-08-31 04:57:12 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:57:12 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:57:12 | D | - sum error = [ 6542.4344, 6543.1421, 6546.1326, 6592.3598, 6624.4791] +25-08-31 04:57:12 | D | - best error = [ 6518.1991, 6518.1991, 6518.1991, 6518.1991, 6518.1991] +25-08-31 04:57:12 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:57:12 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:57:12 | D | - sum error = [ 6659.5237, 6704.7001, 6775.3488, 6849.1772] +25-08-31 04:57:12 | D | - best error = [ 6518.1991, 6518.1991, 6518.1991, 6518.1991] +25-08-31 04:57:12 | D | + error = 6518.1991 +25-08-31 04:57:12 | D | + scale = [min=0.0197, max=2.1688] +25-08-31 04:57:12 | D | - transformer_blocks.11.ff_context.up_proj +25-08-31 04:57:12 | D | + w: sfp4_e2m1_all +25-08-31 04:57:12 | D | + x: sfp4_e2m1_all +25-08-31 04:57:12 | D | + y: None +25-08-31 04:57:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:57:12 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:57:12 | D | + x - AbsMax +25-08-31 04:57:12 | D | + x = [min=0.2197, max=65.0000] +25-08-31 04:57:12 | D | + w - AbsMax +25-08-31 04:57:12 | D | + w = [min=0.0928, max=0.5820] +25-08-31 04:57:12 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:57:13 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:57:56 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:57:56 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:57:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:57:56 | D | - sum error = [ 5636.7725, 5476.4474, 5327.5049, 5253.2887, 5067.5300] +25-08-31 04:57:56 | D | - best error = [ 5636.7725, 5476.4474, 5327.5049, 5253.2887, 5067.5300] +25-08-31 04:57:56 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:57:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:57:56 | D | - sum error = [ 4956.6912, 4859.3216, 4804.7400, 4771.3419, 4705.7560] +25-08-31 04:57:56 | D | - best error = [ 4956.6912, 4859.3216, 4804.7400, 4771.3419, 4705.7560] +25-08-31 04:57:56 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:57:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:57:56 | D | - sum error = [ 4678.8016, 4681.5450, 4662.5712, 4649.8404, 4627.7368] +25-08-31 04:57:56 | D | - best error = [ 4678.8016, 4678.8016, 4662.5712, 4649.8404, 4627.7368] +25-08-31 04:57:56 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:57:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:57:56 | D | - sum error = [ 4642.2191, 4662.3462, 4672.1505, 4689.9923, 4695.0054] +25-08-31 04:57:56 | D | - best error = [ 4627.7368, 4627.7368, 4627.7368, 4627.7368, 4627.7368] +25-08-31 04:57:56 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:57:56 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:57:56 | D | - sum error = [ 5250.6381, 5103.6260, 4994.7264, 4917.8032, 4909.0123] +25-08-31 04:57:56 | D | - best error = [ 4627.7368, 4627.7368, 4627.7368, 4627.7368, 4627.7368] +25-08-31 04:57:56 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:57:56 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:57:56 | D | - sum error = [ 4803.4361, 4756.7918, 4715.0133, 4670.4139, 4641.7744] +25-08-31 04:57:56 | D | - best error = [ 4627.7368, 4627.7368, 4627.7368, 4627.7368, 4627.7368] +25-08-31 04:57:56 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:57:56 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:57:56 | D | - sum error = [ 4621.6266, 4597.1193, 4648.0325, 4613.0827, 4622.7001] +25-08-31 04:57:56 | D | - best error = [ 4621.6266, 4597.1193, 4597.1193, 4597.1193, 4597.1193] +25-08-31 04:57:56 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:57:56 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:57:56 | D | - sum error = [ 4638.3290, 4687.4823, 4677.2417, 4704.8452] +25-08-31 04:57:56 | D | - best error = [ 4597.1193, 4597.1193, 4597.1193, 4597.1193] +25-08-31 04:57:56 | D | + error = 4597.1193 +25-08-31 04:57:56 | D | + scale = [min=0.7729, max=28.9500] +25-08-31 04:57:56 | D | - transformer_blocks.11.ff_context.down_proj +25-08-31 04:57:56 | D | + w: sfp4_e2m1_all +25-08-31 04:57:56 | D | + x: sfp4_e2m1_all +25-08-31 04:57:56 | D | + y: None +25-08-31 04:57:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:57:56 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:57:57 | D | + x - AbsMax +25-08-31 04:57:57 | D | + x = [min=0.0000, max=98.5000] +25-08-31 04:57:57 | D | + w - AbsMax +25-08-31 04:57:57 | D | + w = [min=0.0199, max=0.4707] +25-08-31 04:57:57 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:57:57 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 04:58:45 | D | - x / w range = AbsMax / AbsMax +25-08-31 04:58:45 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 04:58:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:58:45 | D | - sum error = [ 1065.8762, 1028.2694, 998.9828, 975.6383, 958.1066] +25-08-31 04:58:45 | D | - best error = [ 1065.8762, 1028.2694, 998.9828, 975.6383, 958.1066] +25-08-31 04:58:45 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 04:58:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:58:45 | D | - sum error = [ 943.8820, 939.4090, 932.9957, 926.4895, 927.3878] +25-08-31 04:58:45 | D | - best error = [ 943.8820, 939.4090, 932.9957, 926.4895, 926.4895] +25-08-31 04:58:45 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 04:58:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:58:45 | D | - sum error = [ 925.2063, 927.4928, 926.9656, 932.4397, 939.7309] +25-08-31 04:58:45 | D | - best error = [ 925.2063, 925.2063, 925.2063, 925.2063, 925.2063] +25-08-31 04:58:45 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:58:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 04:58:45 | D | - sum error = [ 950.4833, 959.2544, 977.6603, 998.6238, 1024.1359] +25-08-31 04:58:45 | D | - best error = [ 925.2063, 925.2063, 925.2063, 925.2063, 925.2063] +25-08-31 04:58:45 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 04:58:45 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 04:58:45 | D | - sum error = [ 1084.0754, 1059.8362, 1036.1829, 1016.3263, 999.7537] +25-08-31 04:58:45 | D | - best error = [ 925.2063, 925.2063, 925.2063, 925.2063, 925.2063] +25-08-31 04:58:45 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 04:58:45 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 04:58:45 | D | - sum error = [ 991.0629, 983.4827, 975.9274, 977.0035, 976.6201] +25-08-31 04:58:45 | D | - best error = [ 925.2063, 925.2063, 925.2063, 925.2063, 925.2063] +25-08-31 04:58:45 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 04:58:45 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 04:58:45 | D | - sum error = [ 976.4339, 976.6844, 981.9113, 984.4303, 993.8093] +25-08-31 04:58:45 | D | - best error = [ 925.2063, 925.2063, 925.2063, 925.2063, 925.2063] +25-08-31 04:58:45 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 04:58:45 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 04:58:45 | D | - sum error = [ 1002.1945, 1008.8527, 1020.3758, 1033.3527] +25-08-31 04:58:45 | D | - best error = [ 925.2063, 925.2063, 925.2063, 925.2063] +25-08-31 04:58:45 | D | + error = 925.2063 +25-08-31 04:58:45 | D | + scale = [min=0.0004, max=9.9247] +25-08-31 04:59:04 | D | - Smoothing Diffusion Block transformer_blocks.12 +25-08-31 04:59:04 | D | - Skipping Module transformer_blocks.12.norm1.linear +25-08-31 04:59:04 | D | - Skipping Module transformer_blocks.12.norm1_context.linear +25-08-31 04:59:04 | D | - Smoothing Transformer Block transformer_blocks.12 +25-08-31 04:59:04 | D | - transformer_blocks.12.attn.qkv_proj +25-08-31 04:59:04 | D | + w: sfp4_e2m1_all +25-08-31 04:59:04 | D | + x: sfp4_e2m1_all +25-08-31 04:59:04 | D | + y: None +25-08-31 04:59:04 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 04:59:04 | D | + finished parsing calibration arguments, ram usage: 14.6 +25-08-31 04:59:04 | D | + x - AbsMax +25-08-31 04:59:04 | D | + x = [min=0.0898, max=13.3125] +25-08-31 04:59:04 | D | + w - AbsMax +25-08-31 04:59:04 | D | + w = [min=0.1118, max=0.4609] +25-08-31 04:59:04 | D | + finished resetting calibrator, ram usage: 14.6 +25-08-31 04:59:05 | D | + finished calculating the original outputs, ram usage: 14.6 +25-08-31 05:00:19 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:00:19 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:00:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:00:19 | D | - sum error = [ 5725.1524, 5651.4584, 5541.4821, 5471.2185, 5340.5758] +25-08-31 05:00:19 | D | - best error = [ 5725.1524, 5651.4584, 5541.4821, 5471.2185, 5340.5758] +25-08-31 05:00:19 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:00:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:00:19 | D | - sum error = [ 5312.8098, 5229.1896, 5176.8288, 5217.3367, 5164.2324] +25-08-31 05:00:19 | D | - best error = [ 5312.8098, 5229.1896, 5176.8288, 5176.8288, 5164.2324] +25-08-31 05:00:19 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:00:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:00:19 | D | - sum error = [ 5118.5717, 5115.1733, 5187.6912, 5150.3977, 5207.3179] +25-08-31 05:00:19 | D | - best error = [ 5118.5717, 5115.1733, 5115.1733, 5115.1733, 5115.1733] +25-08-31 05:00:19 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:00:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:00:19 | D | - sum error = [ 5219.1902, 5210.5305, 5315.2594, 5371.2585, 5463.1906] +25-08-31 05:00:19 | D | - best error = [ 5115.1733, 5115.1733, 5115.1733, 5115.1733, 5115.1733] +25-08-31 05:00:19 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:00:19 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:00:19 | D | - sum error = [ 7055.8010, 6821.1984, 6636.8453, 6309.2475, 6085.4193] +25-08-31 05:00:19 | D | - best error = [ 5115.1733, 5115.1733, 5115.1733, 5115.1733, 5115.1733] +25-08-31 05:00:19 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:00:19 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:00:19 | D | - sum error = [ 5951.8925, 5801.0992, 5655.7885, 5522.7064, 5480.4027] +25-08-31 05:00:19 | D | - best error = [ 5115.1733, 5115.1733, 5115.1733, 5115.1733, 5115.1733] +25-08-31 05:00:19 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:00:19 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:00:19 | D | - sum error = [ 5399.0184, 5400.9483, 5327.2451, 5331.7558, 5313.8212] +25-08-31 05:00:19 | D | - best error = [ 5115.1733, 5115.1733, 5115.1733, 5115.1733, 5115.1733] +25-08-31 05:00:19 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:00:19 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:00:19 | D | - sum error = [ 5382.1096, 5371.3475, 5369.9024, 5477.7901] +25-08-31 05:00:19 | D | - best error = [ 5115.1733, 5115.1733, 5115.1733, 5115.1733] +25-08-31 05:00:19 | D | + error = 5115.1733 +25-08-31 05:00:19 | D | + scale = [min=0.2657, max=4.1528] +25-08-31 05:00:19 | D | - transformer_blocks.12.attn add_qkv_proj +25-08-31 05:00:19 | D | + w: sfp4_e2m1_all +25-08-31 05:00:19 | D | + x: sfp4_e2m1_all +25-08-31 05:00:19 | D | + y: None +25-08-31 05:00:19 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:00:19 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 05:00:19 | D | + x - AbsMax +25-08-31 05:00:19 | D | + x = [min=0.1494, max=29.3750] +25-08-31 05:00:19 | D | + w - AbsMax +25-08-31 05:00:19 | D | + w = [min=0.1309, max=0.5391] +25-08-31 05:00:19 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 05:00:20 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 05:01:27 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:01:27 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:01:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:01:27 | D | - sum error = [ 1219.9834, 1232.5623, 1203.9672, 1203.7555, 1191.3319] +25-08-31 05:01:27 | D | - best error = [ 1219.9834, 1219.9834, 1203.9672, 1203.7555, 1191.3319] +25-08-31 05:01:27 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:01:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:01:27 | D | - sum error = [ 1203.0443, 1197.1034, 1179.7379, 1171.9735, 1170.5942] +25-08-31 05:01:27 | D | - best error = [ 1191.3319, 1191.3319, 1179.7379, 1171.9735, 1170.5942] +25-08-31 05:01:27 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:01:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:01:27 | D | - sum error = [ 1179.2215, 1187.5179, 1188.6182, 1208.2342, 1198.8308] +25-08-31 05:01:27 | D | - best error = [ 1170.5942, 1170.5942, 1170.5942, 1170.5942, 1170.5942] +25-08-31 05:01:27 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:01:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:01:27 | D | - sum error = [ 1212.6030, 1214.5994, 1214.3817, 1247.3742, 1257.6378] +25-08-31 05:01:27 | D | - best error = [ 1170.5942, 1170.5942, 1170.5942, 1170.5942, 1170.5942] +25-08-31 05:01:27 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:01:27 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:01:27 | D | - sum error = [ 1404.1473, 1383.1813, 1334.4940, 1327.2062, 1289.9335] +25-08-31 05:01:27 | D | - best error = [ 1170.5942, 1170.5942, 1170.5942, 1170.5942, 1170.5942] +25-08-31 05:01:27 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:01:27 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:01:27 | D | - sum error = [ 1285.5766, 1278.8968, 1266.3561, 1234.0652, 1237.8761] +25-08-31 05:01:27 | D | - best error = [ 1170.5942, 1170.5942, 1170.5942, 1170.5942, 1170.5942] +25-08-31 05:01:27 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:01:27 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:01:27 | D | - sum error = [ 1264.2349, 1230.7582, 1236.1361, 1236.0955, 1206.3799] +25-08-31 05:01:27 | D | - best error = [ 1170.5942, 1170.5942, 1170.5942, 1170.5942, 1170.5942] +25-08-31 05:01:27 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:01:27 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:01:27 | D | - sum error = [ 1254.3860, 1232.5107, 1257.3019, 1229.6212] +25-08-31 05:01:27 | D | - best error = [ 1170.5942, 1170.5942, 1170.5942, 1170.5942] +25-08-31 05:01:27 | D | + error = 1170.5942 +25-08-31 05:01:27 | D | + scale = [min=0.4251, max=4.5771] +25-08-31 05:01:27 | D | - transformer_blocks.12.attn.out_proj + transformer_blocks.12.attn.add_out_proj +25-08-31 05:01:27 | D | + w: sfp4_e2m1_all +25-08-31 05:01:27 | D | + x: sfp4_e2m1_all +25-08-31 05:01:27 | D | + y: None +25-08-31 05:01:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:01:27 | D | + finished parsing calibration arguments, ram usage: 15.2 +25-08-31 05:01:28 | D | + x - AbsMax +25-08-31 05:01:28 | D | + x = [min=3.8906, max=28.7500] +25-08-31 05:01:28 | D | + w - AbsMax +25-08-31 05:01:28 | D | + w = [min=0.1279, max=0.3242] +25-08-31 05:01:28 | D | + finished resetting calibrator, ram usage: 15.2 +25-08-31 05:01:29 | D | + finished calculating the original outputs, ram usage: 15.2 +25-08-31 05:03:16 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:03:16 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:03:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:03:16 | D | - sum error = [ 9005.7013, 8979.3657, 9000.1636, 8999.5182, 9019.9596] +25-08-31 05:03:16 | D | - best error = [ 9005.7013, 8979.3657, 8979.3657, 8979.3657, 8979.3657] +25-08-31 05:03:16 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:03:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:03:16 | D | - sum error = [ 9001.7727, 9017.6037, 9005.9847, 9038.3282, 9033.1111] +25-08-31 05:03:16 | D | - best error = [ 8979.3657, 8979.3657, 8979.3657, 8979.3657, 8979.3657] +25-08-31 05:03:16 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:03:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:03:16 | D | - sum error = [ 9048.8191, 9086.7662, 9092.0150, 9109.6590, 9130.7805] +25-08-31 05:03:16 | D | - best error = [ 8979.3657, 8979.3657, 8979.3657, 8979.3657, 8979.3657] +25-08-31 05:03:16 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:03:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:03:16 | D | - sum error = [ 9156.3622, 9180.9882, 9214.2281, 9241.2250, 9284.7173] +25-08-31 05:03:16 | D | - best error = [ 8979.3657, 8979.3657, 8979.3657, 8979.3657, 8979.3657] +25-08-31 05:03:16 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:03:16 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:03:16 | D | - sum error = [ 9116.0193, 9079.4625, 9071.9815, 9099.8224, 9073.2468] +25-08-31 05:03:16 | D | - best error = [ 8979.3657, 8979.3657, 8979.3657, 8979.3657, 8979.3657] +25-08-31 05:03:16 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:03:16 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:03:16 | D | - sum error = [ 9064.7264, 9066.1277, 9064.9889, 9064.3901, 9093.7935] +25-08-31 05:03:16 | D | - best error = [ 8979.3657, 8979.3657, 8979.3657, 8979.3657, 8979.3657] +25-08-31 05:03:16 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:03:16 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:03:16 | D | - sum error = [ 9089.9729, 9111.9868, 9141.2524, 9153.1026, 9157.6342] +25-08-31 05:03:16 | D | - best error = [ 8979.3657, 8979.3657, 8979.3657, 8979.3657, 8979.3657] +25-08-31 05:03:16 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:03:16 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:03:16 | D | - sum error = [ 9179.4597, 9201.6090, 9233.0680, 9277.4725] +25-08-31 05:03:16 | D | - best error = [ 8979.3657, 8979.3657, 8979.3657, 8979.3657] +25-08-31 05:03:16 | D | + error = 8979.3657 +25-08-31 05:03:16 | D | + scale = [min=1.0703, max=1.1829] +25-08-31 05:03:17 | D | - transformer_blocks.12.ff.up_proj +25-08-31 05:03:17 | D | + w: sfp4_e2m1_all +25-08-31 05:03:17 | D | + x: sfp4_e2m1_all +25-08-31 05:03:17 | D | + y: None +25-08-31 05:03:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:03:17 | D | + finished parsing calibration arguments, ram usage: 15.2 +25-08-31 05:03:17 | D | + x - AbsMax +25-08-31 05:03:17 | D | + x = [min=0.1455, max=8.0000] +25-08-31 05:03:17 | D | + w - AbsMax +25-08-31 05:03:17 | D | + w = [min=0.0933, max=0.5938] +25-08-31 05:03:17 | D | + finished resetting calibrator, ram usage: 15.2 +25-08-31 05:03:18 | D | + finished calculating the original outputs, ram usage: 15.2 +25-08-31 05:04:37 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:04:37 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:04:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:04:37 | D | - sum error = [ 8789.5424, 8708.3050, 8615.8779, 8552.8470, 8476.2487] +25-08-31 05:04:37 | D | - best error = [ 8789.5424, 8708.3050, 8615.8779, 8552.8470, 8476.2487] +25-08-31 05:04:37 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:04:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:04:37 | D | - sum error = [ 8430.0932, 8386.7286, 8326.3807, 8339.5635, 8301.6591] +25-08-31 05:04:37 | D | - best error = [ 8430.0932, 8386.7286, 8326.3807, 8326.3807, 8301.6591] +25-08-31 05:04:37 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:04:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:04:37 | D | - sum error = [ 8297.7693, 8303.8324, 8296.0439, 8315.5204, 8330.9034] +25-08-31 05:04:37 | D | - best error = [ 8297.7693, 8297.7693, 8296.0439, 8296.0439, 8296.0439] +25-08-31 05:04:37 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:04:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:04:37 | D | - sum error = [ 8365.5494, 8400.5324, 8453.7850, 8504.6411, 8573.8080] +25-08-31 05:04:37 | D | - best error = [ 8296.0439, 8296.0439, 8296.0439, 8296.0439, 8296.0439] +25-08-31 05:04:37 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:04:37 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:04:37 | D | - sum error = [ 8934.2066, 8822.9161, 8728.7117, 8615.0352, 8549.6681] +25-08-31 05:04:37 | D | - best error = [ 8296.0439, 8296.0439, 8296.0439, 8296.0439, 8296.0439] +25-08-31 05:04:37 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:04:37 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:04:37 | D | - sum error = [ 8483.9571, 8431.9843, 8383.7195, 8356.0263, 8336.9318] +25-08-31 05:04:37 | D | - best error = [ 8296.0439, 8296.0439, 8296.0439, 8296.0439, 8296.0439] +25-08-31 05:04:37 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:04:37 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:04:37 | D | - sum error = [ 8321.9959, 8317.7531, 8327.5740, 8348.4480, 8349.2400] +25-08-31 05:04:37 | D | - best error = [ 8296.0439, 8296.0439, 8296.0439, 8296.0439, 8296.0439] +25-08-31 05:04:37 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:04:37 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:04:37 | D | - sum error = [ 8402.5644, 8437.2811, 8496.6455, 8565.1975] +25-08-31 05:04:37 | D | - best error = [ 8296.0439, 8296.0439, 8296.0439, 8296.0439] +25-08-31 05:04:37 | D | + error = 8296.0439 +25-08-31 05:04:37 | D | + scale = [min=0.3146, max=3.4822] +25-08-31 05:04:37 | D | - transformer_blocks.12.ff.down_proj +25-08-31 05:04:37 | D | + w: sfp4_e2m1_all +25-08-31 05:04:37 | D | + x: sfp4_e2m1_all +25-08-31 05:04:37 | D | + y: None +25-08-31 05:04:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:04:37 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:04:40 | D | + x - AbsMax +25-08-31 05:04:40 | D | + x = [min=0.0000, max=26.3750] +25-08-31 05:04:40 | D | + w - AbsMax +25-08-31 05:04:40 | D | + w = [min=0.0469, max=0.8320] +25-08-31 05:04:40 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:04:42 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:06:38 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:06:38 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:06:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:06:38 | D | - sum error = [ 6622.3283, 6627.7655, 6692.3972, 6612.8735, 6612.4706] +25-08-31 05:06:38 | D | - best error = [ 6622.3283, 6622.3283, 6622.3283, 6612.8735, 6612.4706] +25-08-31 05:06:38 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:06:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:06:38 | D | - sum error = [ 6617.4252, 6622.0396, 6636.2031, 6640.6364, 6663.4300] +25-08-31 05:06:38 | D | - best error = [ 6612.4706, 6612.4706, 6612.4706, 6612.4706, 6612.4706] +25-08-31 05:06:38 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:06:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:06:38 | D | - sum error = [ 6745.4963, 6705.1819, 6727.9759, 6756.6592, 6789.7211] +25-08-31 05:06:38 | D | - best error = [ 6612.4706, 6612.4706, 6612.4706, 6612.4706, 6612.4706] +25-08-31 05:06:38 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:06:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:06:38 | D | - sum error = [ 6843.7154, 6878.0960, 6926.1022, 6977.9917, 7040.4119] +25-08-31 05:06:38 | D | - best error = [ 6612.4706, 6612.4706, 6612.4706, 6612.4706, 6612.4706] +25-08-31 05:06:38 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:06:38 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:06:38 | D | - sum error = [ 6905.3550, 6854.9153, 6812.1676, 6782.8513, 6737.7640] +25-08-31 05:06:38 | D | - best error = [ 6612.4706, 6612.4706, 6612.4706, 6612.4706, 6612.4706] +25-08-31 05:06:38 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:06:38 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:06:38 | D | - sum error = [ 6707.8649, 6686.0162, 6683.5526, 6653.8019, 6652.8177] +25-08-31 05:06:38 | D | - best error = [ 6612.4706, 6612.4706, 6612.4706, 6612.4706, 6612.4706] +25-08-31 05:06:38 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:06:38 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:06:38 | D | - sum error = [ 6654.3496, 6668.5279, 6688.0544, 6720.5872, 6765.6911] +25-08-31 05:06:38 | D | - best error = [ 6612.4706, 6612.4706, 6612.4706, 6612.4706, 6612.4706] +25-08-31 05:06:38 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:06:38 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:06:38 | D | - sum error = [ 6809.3964, 6859.7880, 6943.9643, 7017.1296] +25-08-31 05:06:38 | D | - best error = [ 6612.4706, 6612.4706, 6612.4706, 6612.4706] +25-08-31 05:06:38 | D | + error = 6612.4706 +25-08-31 05:06:38 | D | + scale = [min=0.0432, max=1.9241] +25-08-31 05:06:38 | D | - transformer_blocks.12.ff_context.up_proj +25-08-31 05:06:38 | D | + w: sfp4_e2m1_all +25-08-31 05:06:38 | D | + x: sfp4_e2m1_all +25-08-31 05:06:38 | D | + y: None +25-08-31 05:06:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:06:38 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:06:38 | D | + x - AbsMax +25-08-31 05:06:38 | D | + x = [min=0.1338, max=99.0000] +25-08-31 05:06:38 | D | + w - AbsMax +25-08-31 05:06:38 | D | + w = [min=0.1094, max=0.4102] +25-08-31 05:06:38 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:06:39 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:07:21 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:07:21 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:07:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:07:21 | D | - sum error = [ 4977.1532, 4578.9746, 4157.8696, 3806.6321, 3397.2451] +25-08-31 05:07:21 | D | - best error = [ 4977.1532, 4578.9746, 4157.8696, 3806.6321, 3397.2451] +25-08-31 05:07:21 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:07:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:07:21 | D | - sum error = [ 3067.5101, 2744.8882, 2454.3095, 2198.4974, 1948.0366] +25-08-31 05:07:21 | D | - best error = [ 3067.5101, 2744.8882, 2454.3095, 2198.4974, 1948.0366] +25-08-31 05:07:21 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:07:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:07:21 | D | - sum error = [ 1810.0642, 1718.4488, 1669.1952, 1635.2510, 1604.4058] +25-08-31 05:07:21 | D | - best error = [ 1810.0642, 1718.4488, 1669.1952, 1635.2510, 1604.4058] +25-08-31 05:07:21 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:07:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:07:21 | D | - sum error = [ 1584.6173, 1593.8408, 1621.5929, 1641.9064, 1661.9375] +25-08-31 05:07:21 | D | - best error = [ 1584.6173, 1584.6173, 1584.6173, 1584.6173, 1584.6173] +25-08-31 05:07:21 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:07:21 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:07:21 | D | - sum error = [ 5403.9676, 4930.5945, 4495.0568, 4033.2285, 3653.4396] +25-08-31 05:07:21 | D | - best error = [ 1584.6173, 1584.6173, 1584.6173, 1584.6173, 1584.6173] +25-08-31 05:07:21 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:07:21 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:07:21 | D | - sum error = [ 3213.5151, 2876.4656, 2528.6977, 2208.0123, 1967.8812] +25-08-31 05:07:21 | D | - best error = [ 1584.6173, 1584.6173, 1584.6173, 1584.6173, 1584.6173] +25-08-31 05:07:21 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:07:21 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:07:21 | D | - sum error = [ 1824.1106, 1725.0260, 1662.0098, 1624.5355, 1614.8717] +25-08-31 05:07:21 | D | - best error = [ 1584.6173, 1584.6173, 1584.6173, 1584.6173, 1584.6173] +25-08-31 05:07:21 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:07:21 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:07:21 | D | - sum error = [ 1611.7163, 1604.6704, 1626.1433, 1668.6474] +25-08-31 05:07:21 | D | - best error = [ 1584.6173, 1584.6173, 1584.6173, 1584.6173] +25-08-31 05:07:21 | D | + error = 1584.6173 +25-08-31 05:07:21 | D | + scale = [min=0.2212, max=31.3853] +25-08-31 05:07:22 | D | - transformer_blocks.12.ff_context.down_proj +25-08-31 05:07:22 | D | + w: sfp4_e2m1_all +25-08-31 05:07:22 | D | + x: sfp4_e2m1_all +25-08-31 05:07:22 | D | + y: None +25-08-31 05:07:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:07:22 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:07:22 | D | + x - AbsMax +25-08-31 05:07:22 | D | + x = [min=0.0000, max=40.5000] +25-08-31 05:07:22 | D | + w - AbsMax +25-08-31 05:07:22 | D | + w = [min=0.0288, max=0.5234] +25-08-31 05:07:22 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:07:23 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:08:11 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:08:11 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:08:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:08:11 | D | - sum error = [ 1450.3506, 1333.2459, 1248.1045, 1176.2768, 1104.5462] +25-08-31 05:08:11 | D | - best error = [ 1450.3506, 1333.2459, 1248.1045, 1176.2768, 1104.5462] +25-08-31 05:08:11 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:08:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:08:11 | D | - sum error = [ 1068.6927, 1024.7164, 982.5482, 950.8893, 935.7829] +25-08-31 05:08:11 | D | - best error = [ 1068.6927, 1024.7164, 982.5482, 950.8893, 935.7829] +25-08-31 05:08:11 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:08:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:08:11 | D | - sum error = [ 920.9754, 920.3569, 918.1232, 895.4133, 909.6778] +25-08-31 05:08:11 | D | - best error = [ 920.9754, 920.3569, 918.1232, 895.4133, 895.4133] +25-08-31 05:08:11 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:08:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:08:11 | D | - sum error = [ 903.2819, 908.6900, 916.4301, 919.5302, 930.5873] +25-08-31 05:08:11 | D | - best error = [ 895.4133, 895.4133, 895.4133, 895.4133, 895.4133] +25-08-31 05:08:11 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:08:11 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:08:11 | D | - sum error = [ 1538.6650, 1350.2336, 1233.3556, 1131.1901, 1076.8394] +25-08-31 05:08:11 | D | - best error = [ 895.4133, 895.4133, 895.4133, 895.4133, 895.4133] +25-08-31 05:08:11 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:08:11 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:08:11 | D | - sum error = [ 1020.6338, 983.0701, 958.8438, 931.2535, 915.4867] +25-08-31 05:08:11 | D | - best error = [ 895.4133, 895.4133, 895.4133, 895.4133, 895.4133] +25-08-31 05:08:11 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:08:11 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:08:11 | D | - sum error = [ 897.6800, 909.1646, 898.0221, 893.3660, 890.7347] +25-08-31 05:08:11 | D | - best error = [ 895.4133, 895.4133, 895.4133, 893.3660, 890.7347] +25-08-31 05:08:11 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:08:11 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:08:11 | D | - sum error = [ 906.6174, 903.1734, 924.2493, 931.3795] +25-08-31 05:08:11 | D | - best error = [ 890.7347, 890.7347, 890.7347, 890.7347] +25-08-31 05:08:11 | D | + error = 890.7347 +25-08-31 05:08:11 | D | + scale = [min=0.0000, max=21.3078] +25-08-31 05:08:30 | D | - Smoothing Diffusion Block transformer_blocks.13 +25-08-31 05:08:30 | D | - Skipping Module transformer_blocks.13.norm1.linear +25-08-31 05:08:30 | D | - Skipping Module transformer_blocks.13.norm1_context.linear +25-08-31 05:08:30 | D | - Smoothing Transformer Block transformer_blocks.13 +25-08-31 05:08:30 | D | - transformer_blocks.13.attn.qkv_proj +25-08-31 05:08:30 | D | + w: sfp4_e2m1_all +25-08-31 05:08:30 | D | + x: sfp4_e2m1_all +25-08-31 05:08:30 | D | + y: None +25-08-31 05:08:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:08:30 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:08:31 | D | + x - AbsMax +25-08-31 05:08:31 | D | + x = [min=0.1152, max=17.7500] +25-08-31 05:08:31 | D | + w - AbsMax +25-08-31 05:08:31 | D | + w = [min=0.1074, max=0.8320] +25-08-31 05:08:31 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:08:32 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:09:50 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:09:50 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:09:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:09:50 | D | - sum error = [ 6782.3141, 6714.0095, 6596.8814, 6544.6610, 6527.1626] +25-08-31 05:09:50 | D | - best error = [ 6782.3141, 6714.0095, 6596.8814, 6544.6610, 6527.1626] +25-08-31 05:09:50 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:09:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:09:50 | D | - sum error = [ 6479.4586, 6425.0921, 6369.1579, 6340.1009, 6332.8278] +25-08-31 05:09:50 | D | - best error = [ 6479.4586, 6425.0921, 6369.1579, 6340.1009, 6332.8278] +25-08-31 05:09:50 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:09:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:09:50 | D | - sum error = [ 6296.0318, 6258.8200, 6260.0466, 6259.2683, 6320.8199] +25-08-31 05:09:50 | D | - best error = [ 6296.0318, 6258.8200, 6258.8200, 6258.8200, 6258.8200] +25-08-31 05:09:50 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:09:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:09:50 | D | - sum error = [ 6314.2515, 6364.0886, 6420.0336, 6455.3599, 6497.8415] +25-08-31 05:09:50 | D | - best error = [ 6258.8200, 6258.8200, 6258.8200, 6258.8200, 6258.8200] +25-08-31 05:09:50 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:09:50 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:09:50 | D | - sum error = [ 9675.5039, 9022.9667, 8590.6512, 8237.6885, 7953.5295] +25-08-31 05:09:50 | D | - best error = [ 6258.8200, 6258.8200, 6258.8200, 6258.8200, 6258.8200] +25-08-31 05:09:50 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:09:50 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:09:50 | D | - sum error = [ 7519.7266, 7246.0963, 7156.3326, 6992.1136, 6827.5369] +25-08-31 05:09:50 | D | - best error = [ 6258.8200, 6258.8200, 6258.8200, 6258.8200, 6258.8200] +25-08-31 05:09:50 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:09:50 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:09:50 | D | - sum error = [ 6719.3009, 6635.9087, 6575.2338, 6483.4346, 6503.8180] +25-08-31 05:09:50 | D | - best error = [ 6258.8200, 6258.8200, 6258.8200, 6258.8200, 6258.8200] +25-08-31 05:09:50 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:09:50 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:09:50 | D | - sum error = [ 6427.7940, 6468.9919, 6477.1293, 6487.7580] +25-08-31 05:09:50 | D | - best error = [ 6258.8200, 6258.8200, 6258.8200, 6258.8200] +25-08-31 05:09:50 | D | + error = 6258.8200 +25-08-31 05:09:50 | D | + scale = [min=0.3047, max=4.8647] +25-08-31 05:09:50 | D | - transformer_blocks.13.attn add_qkv_proj +25-08-31 05:09:50 | D | + w: sfp4_e2m1_all +25-08-31 05:09:50 | D | + x: sfp4_e2m1_all +25-08-31 05:09:50 | D | + y: None +25-08-31 05:09:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:09:50 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:09:50 | D | + x - AbsMax +25-08-31 05:09:50 | D | + x = [min=0.1245, max=33.0000] +25-08-31 05:09:50 | D | + w - AbsMax +25-08-31 05:09:50 | D | + w = [min=0.1235, max=0.5781] +25-08-31 05:09:50 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:09:51 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:10:59 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:10:59 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:10:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:10:59 | D | - sum error = [ 1062.9863, 1034.2998, 1019.2473, 1016.8624, 968.7351] +25-08-31 05:10:59 | D | - best error = [ 1062.9863, 1034.2998, 1019.2473, 1016.8624, 968.7351] +25-08-31 05:10:59 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:10:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:10:59 | D | - sum error = [ 970.8832, 969.8910, 938.9658, 951.2142, 934.5027] +25-08-31 05:10:59 | D | - best error = [ 968.7351, 968.7351, 938.9658, 938.9658, 934.5027] +25-08-31 05:10:59 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:10:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:10:59 | D | - sum error = [ 940.2387, 928.9847, 929.0900, 935.6796, 931.9422] +25-08-31 05:10:59 | D | - best error = [ 934.5027, 928.9847, 928.9847, 928.9847, 928.9847] +25-08-31 05:10:59 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:10:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:10:59 | D | - sum error = [ 945.1448, 945.0655, 953.3793, 1014.6365, 984.2671] +25-08-31 05:10:59 | D | - best error = [ 928.9847, 928.9847, 928.9847, 928.9847, 928.9847] +25-08-31 05:10:59 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:10:59 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:10:59 | D | - sum error = [ 1336.7737, 1278.7013, 1202.3396, 1189.3069, 1155.5657] +25-08-31 05:10:59 | D | - best error = [ 928.9847, 928.9847, 928.9847, 928.9847, 928.9847] +25-08-31 05:10:59 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:10:59 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:10:59 | D | - sum error = [ 1091.5324, 1043.2564, 1055.1544, 998.0317, 971.1234] +25-08-31 05:10:59 | D | - best error = [ 928.9847, 928.9847, 928.9847, 928.9847, 928.9847] +25-08-31 05:10:59 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:10:59 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:10:59 | D | - sum error = [ 956.1740, 949.6311, 963.0783, 956.1814, 951.1378] +25-08-31 05:10:59 | D | - best error = [ 928.9847, 928.9847, 928.9847, 928.9847, 928.9847] +25-08-31 05:10:59 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:10:59 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:10:59 | D | - sum error = [ 943.0630, 973.0771, 968.6829, 991.7514] +25-08-31 05:10:59 | D | - best error = [ 928.9847, 928.9847, 928.9847, 928.9847] +25-08-31 05:10:59 | D | + error = 928.9847 +25-08-31 05:10:59 | D | + scale = [min=0.3180, max=6.8420] +25-08-31 05:10:59 | D | - transformer_blocks.13.attn.out_proj + transformer_blocks.13.attn.add_out_proj +25-08-31 05:10:59 | D | + w: sfp4_e2m1_all +25-08-31 05:10:59 | D | + x: sfp4_e2m1_all +25-08-31 05:10:59 | D | + y: None +25-08-31 05:10:59 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:10:59 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:11:00 | D | + x - AbsMax +25-08-31 05:11:00 | D | + x = [min=3.3125, max=16.1250] +25-08-31 05:11:00 | D | + w - AbsMax +25-08-31 05:11:00 | D | + w = [min=0.1289, max=0.3145] +25-08-31 05:11:00 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:11:01 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:12:50 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:12:50 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:12:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:12:50 | D | - sum error = [10282.4285, 10269.5371, 10261.2030, 10254.8599, 10240.7074] +25-08-31 05:12:50 | D | - best error = [10282.4285, 10269.5371, 10261.2030, 10254.8599, 10240.7074] +25-08-31 05:12:50 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:12:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:12:50 | D | - sum error = [10224.3220, 10242.9664, 10200.1973, 10207.3142, 10204.0031] +25-08-31 05:12:50 | D | - best error = [10224.3220, 10224.3220, 10200.1973, 10200.1973, 10200.1973] +25-08-31 05:12:50 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:12:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:12:50 | D | - sum error = [10226.7751, 10188.9407, 10213.7476, 10231.9382, 10244.9355] +25-08-31 05:12:50 | D | - best error = [10200.1973, 10188.9407, 10188.9407, 10188.9407, 10188.9407] +25-08-31 05:12:50 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:12:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:12:50 | D | - sum error = [10249.6470, 10272.8169, 10270.8878, 10297.7679, 10286.3790] +25-08-31 05:12:50 | D | - best error = [10188.9407, 10188.9407, 10188.9407, 10188.9407, 10188.9407] +25-08-31 05:12:50 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:12:50 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:12:50 | D | - sum error = [10437.3315, 10392.3256, 10372.6756, 10349.5275, 10365.5151] +25-08-31 05:12:50 | D | - best error = [10188.9407, 10188.9407, 10188.9407, 10188.9407, 10188.9407] +25-08-31 05:12:50 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:12:50 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:12:50 | D | - sum error = [10323.4370, 10313.2334, 10309.3740, 10268.3599, 10265.1391] +25-08-31 05:12:50 | D | - best error = [10188.9407, 10188.9407, 10188.9407, 10188.9407, 10188.9407] +25-08-31 05:12:50 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:12:50 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:12:50 | D | - sum error = [10240.7703, 10267.6432, 10273.2791, 10260.6857, 10254.1086] +25-08-31 05:12:50 | D | - best error = [10188.9407, 10188.9407, 10188.9407, 10188.9407, 10188.9407] +25-08-31 05:12:50 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:12:50 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:12:50 | D | - sum error = [10278.4575, 10280.6786, 10284.9572, 10277.8535] +25-08-31 05:12:50 | D | - best error = [10188.9407, 10188.9407, 10188.9407, 10188.9407] +25-08-31 05:12:50 | D | + error = 10188.9407 +25-08-31 05:12:50 | D | + scale = [min=1.9323, max=4.6145] +25-08-31 05:12:50 | D | - transformer_blocks.13.ff.up_proj +25-08-31 05:12:50 | D | + w: sfp4_e2m1_all +25-08-31 05:12:50 | D | + x: sfp4_e2m1_all +25-08-31 05:12:50 | D | + y: None +25-08-31 05:12:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:12:50 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:12:51 | D | + x - AbsMax +25-08-31 05:12:51 | D | + x = [min=0.0952, max=7.8125] +25-08-31 05:12:51 | D | + w - AbsMax +25-08-31 05:12:51 | D | + w = [min=0.1167, max=0.5820] +25-08-31 05:12:51 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:12:52 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:14:09 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:14:09 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:14:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:14:09 | D | - sum error = [ 8107.3039, 8058.1462, 8029.2560, 7988.8022, 7964.2120] +25-08-31 05:14:09 | D | - best error = [ 8107.3039, 8058.1462, 8029.2560, 7988.8022, 7964.2120] +25-08-31 05:14:09 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:14:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:14:09 | D | - sum error = [ 7945.2081, 7918.3296, 7898.6520, 7889.0978, 7882.0284] +25-08-31 05:14:09 | D | - best error = [ 7945.2081, 7918.3296, 7898.6520, 7889.0978, 7882.0284] +25-08-31 05:14:09 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:14:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:14:09 | D | - sum error = [ 7883.2635, 7881.9518, 7878.2041, 7904.5825, 7916.8294] +25-08-31 05:14:09 | D | - best error = [ 7882.0284, 7881.9518, 7878.2041, 7878.2041, 7878.2041] +25-08-31 05:14:09 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:14:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:14:09 | D | - sum error = [ 7941.4709, 7966.5157, 7983.8672, 8013.5039, 8042.4566] +25-08-31 05:14:09 | D | - best error = [ 7878.2041, 7878.2041, 7878.2041, 7878.2041, 7878.2041] +25-08-31 05:14:09 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:14:09 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:14:09 | D | - sum error = [ 8364.6906, 8281.4139, 8203.0911, 8156.6106, 8098.5223] +25-08-31 05:14:09 | D | - best error = [ 7878.2041, 7878.2041, 7878.2041, 7878.2041, 7878.2041] +25-08-31 05:14:09 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:14:09 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:14:09 | D | - sum error = [ 8049.3006, 8009.8609, 7974.0620, 7951.0548, 7944.7074] +25-08-31 05:14:09 | D | - best error = [ 7878.2041, 7878.2041, 7878.2041, 7878.2041, 7878.2041] +25-08-31 05:14:09 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:14:09 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:14:09 | D | - sum error = [ 7930.3581, 7913.0336, 7919.1846, 7927.6857, 7943.2605] +25-08-31 05:14:09 | D | - best error = [ 7878.2041, 7878.2041, 7878.2041, 7878.2041, 7878.2041] +25-08-31 05:14:09 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:14:09 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:14:09 | D | - sum error = [ 7965.0446, 7987.5483, 8015.9117, 8039.0975] +25-08-31 05:14:09 | D | - best error = [ 7878.2041, 7878.2041, 7878.2041, 7878.2041] +25-08-31 05:14:09 | D | + error = 7878.2041 +25-08-31 05:14:09 | D | + scale = [min=0.2439, max=3.4330] +25-08-31 05:14:10 | D | - transformer_blocks.13.ff.down_proj +25-08-31 05:14:10 | D | + w: sfp4_e2m1_all +25-08-31 05:14:10 | D | + x: sfp4_e2m1_all +25-08-31 05:14:10 | D | + y: None +25-08-31 05:14:10 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:14:10 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:14:12 | D | + x - AbsMax +25-08-31 05:14:12 | D | + x = [min=0.0000, max=18.1250] +25-08-31 05:14:12 | D | + w - AbsMax +25-08-31 05:14:12 | D | + w = [min=0.0757, max=1.0156] +25-08-31 05:14:12 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:14:14 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:16:09 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:16:09 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:16:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:16:09 | D | - sum error = [ 6899.7443, 6893.4731, 6919.5318, 6905.9522, 6898.5227] +25-08-31 05:16:09 | D | - best error = [ 6899.7443, 6893.4731, 6893.4731, 6893.4731, 6893.4731] +25-08-31 05:16:09 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:16:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:16:09 | D | - sum error = [ 6906.3947, 6904.8490, 6929.4718, 6935.2537, 6965.0077] +25-08-31 05:16:09 | D | - best error = [ 6893.4731, 6893.4731, 6893.4731, 6893.4731, 6893.4731] +25-08-31 05:16:09 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:16:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:16:09 | D | - sum error = [ 6982.5150, 6999.1908, 7032.8842, 7057.3301, 7103.1844] +25-08-31 05:16:09 | D | - best error = [ 6893.4731, 6893.4731, 6893.4731, 6893.4731, 6893.4731] +25-08-31 05:16:09 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:16:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:16:09 | D | - sum error = [ 7145.5648, 7201.8928, 7238.8483, 7297.9937, 7368.3222] +25-08-31 05:16:09 | D | - best error = [ 6893.4731, 6893.4731, 6893.4731, 6893.4731, 6893.4731] +25-08-31 05:16:09 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:16:09 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:16:09 | D | - sum error = [ 7571.7087, 7470.1336, 7385.8513, 7311.8247, 7249.3045] +25-08-31 05:16:09 | D | - best error = [ 6893.4731, 6893.4731, 6893.4731, 6893.4731, 6893.4731] +25-08-31 05:16:09 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:16:09 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:16:09 | D | - sum error = [ 7187.7023, 7138.0328, 7094.2771, 7061.1183, 7038.3450] +25-08-31 05:16:09 | D | - best error = [ 6893.4731, 6893.4731, 6893.4731, 6893.4731, 6893.4731] +25-08-31 05:16:09 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:16:09 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:16:09 | D | - sum error = [ 7024.8695, 7030.2532, 7029.3015, 7071.4576, 7091.8271] +25-08-31 05:16:09 | D | - best error = [ 6893.4731, 6893.4731, 6893.4731, 6893.4731, 6893.4731] +25-08-31 05:16:09 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:16:09 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:16:09 | D | - sum error = [ 7133.9902, 7201.2841, 7252.3607, 7343.8747] +25-08-31 05:16:09 | D | - best error = [ 6893.4731, 6893.4731, 6893.4731, 6893.4731] +25-08-31 05:16:09 | D | + error = 6893.4731 +25-08-31 05:16:09 | D | + scale = [min=0.5043, max=1.1559] +25-08-31 05:16:09 | D | - transformer_blocks.13.ff_context.up_proj +25-08-31 05:16:09 | D | + w: sfp4_e2m1_all +25-08-31 05:16:09 | D | + x: sfp4_e2m1_all +25-08-31 05:16:09 | D | + y: None +25-08-31 05:16:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:16:09 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:16:09 | D | + x - AbsMax +25-08-31 05:16:09 | D | + x = [min=0.1426, max=152.0000] +25-08-31 05:16:09 | D | + w - AbsMax +25-08-31 05:16:09 | D | + w = [min=0.0977, max=0.3672] +25-08-31 05:16:09 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:16:09 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:16:52 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:16:52 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:16:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:16:52 | D | - sum error = [ 7208.2927, 6610.0221, 5770.2293, 5242.6791, 4579.2360] +25-08-31 05:16:52 | D | - best error = [ 7208.2927, 6610.0221, 5770.2293, 5242.6791, 4579.2360] +25-08-31 05:16:52 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:16:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:16:52 | D | - sum error = [ 3986.0855, 3535.0861, 3100.7876, 2712.8671, 2458.9920] +25-08-31 05:16:52 | D | - best error = [ 3986.0855, 3535.0861, 3100.7876, 2712.8671, 2458.9920] +25-08-31 05:16:52 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:16:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:16:52 | D | - sum error = [ 2300.0205, 2192.6137, 2092.5671, 2039.0967, 2018.9781] +25-08-31 05:16:52 | D | - best error = [ 2300.0205, 2192.6137, 2092.5671, 2039.0967, 2018.9781] +25-08-31 05:16:52 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:16:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:16:52 | D | - sum error = [ 1997.7572, 2001.9111, 2028.5063, 2054.0829, 2085.2095] +25-08-31 05:16:52 | D | - best error = [ 1997.7572, 1997.7572, 1997.7572, 1997.7572, 1997.7572] +25-08-31 05:16:52 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:16:52 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:16:52 | D | - sum error = [ 6195.3909, 5526.9612, 4808.1936, 4145.7057, 3679.3529] +25-08-31 05:16:52 | D | - best error = [ 1997.7572, 1997.7572, 1997.7572, 1997.7572, 1997.7572] +25-08-31 05:16:52 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:16:52 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:16:52 | D | - sum error = [ 3263.9605, 2853.0702, 2569.8286, 2392.9527, 2271.0219] +25-08-31 05:16:52 | D | - best error = [ 1997.7572, 1997.7572, 1997.7572, 1997.7572, 1997.7572] +25-08-31 05:16:52 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:16:52 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:16:52 | D | - sum error = [ 2157.1831, 2068.0549, 2024.6729, 2000.3099, 1992.2338] +25-08-31 05:16:52 | D | - best error = [ 1997.7572, 1997.7572, 1997.7572, 1997.7572, 1992.2338] +25-08-31 05:16:52 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:16:52 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:16:52 | D | - sum error = [ 2002.2480, 2009.7852, 2033.4664, 2085.6644] +25-08-31 05:16:52 | D | - best error = [ 1992.2338, 1992.2338, 1992.2338, 1992.2338] +25-08-31 05:16:52 | D | + error = 1992.2338 +25-08-31 05:16:52 | D | + scale = [min=0.3775, max=70.0908] +25-08-31 05:16:52 | D | - transformer_blocks.13.ff_context.down_proj +25-08-31 05:16:52 | D | + w: sfp4_e2m1_all +25-08-31 05:16:52 | D | + x: sfp4_e2m1_all +25-08-31 05:16:52 | D | + y: None +25-08-31 05:16:52 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:16:52 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:16:53 | D | + x - AbsMax +25-08-31 05:16:53 | D | + x = [min=0.0000, max=20.5000] +25-08-31 05:16:53 | D | + w - AbsMax +25-08-31 05:16:53 | D | + w = [min=0.0208, max=0.4688] +25-08-31 05:16:53 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:16:53 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:17:42 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:17:42 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:17:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:17:42 | D | - sum error = [ 574.4239, 520.2211, 488.1199, 452.6350, 429.7901] +25-08-31 05:17:42 | D | - best error = [ 574.4239, 520.2211, 488.1199, 452.6350, 429.7901] +25-08-31 05:17:42 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:17:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:17:42 | D | - sum error = [ 411.6846, 394.5251, 379.9829, 369.1035, 352.1070] +25-08-31 05:17:42 | D | - best error = [ 411.6846, 394.5251, 379.9829, 369.1035, 352.1070] +25-08-31 05:17:42 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:17:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:17:42 | D | - sum error = [ 347.5056, 344.7311, 342.5395, 337.5695, 329.8974] +25-08-31 05:17:42 | D | - best error = [ 347.5056, 344.7311, 342.5395, 337.5695, 329.8974] +25-08-31 05:17:42 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:17:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:17:42 | D | - sum error = [ 327.5837, 330.4871, 331.5413, 338.6937, 339.1935] +25-08-31 05:17:42 | D | - best error = [ 327.5837, 327.5837, 327.5837, 327.5837, 327.5837] +25-08-31 05:17:42 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:17:42 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:17:42 | D | - sum error = [ 589.9770, 529.5626, 488.4054, 455.5895, 426.5814] +25-08-31 05:17:42 | D | - best error = [ 327.5837, 327.5837, 327.5837, 327.5837, 327.5837] +25-08-31 05:17:42 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:17:42 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:17:42 | D | - sum error = [ 409.1668, 395.7908, 378.0587, 370.9731, 363.0071] +25-08-31 05:17:42 | D | - best error = [ 327.5837, 327.5837, 327.5837, 327.5837, 327.5837] +25-08-31 05:17:42 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:17:42 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:17:42 | D | - sum error = [ 356.4865, 350.3894, 347.2203, 343.9018, 341.5962] +25-08-31 05:17:42 | D | - best error = [ 327.5837, 327.5837, 327.5837, 327.5837, 327.5837] +25-08-31 05:17:42 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:17:42 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:17:42 | D | - sum error = [ 341.4676, 341.3490, 343.1112, 339.2152] +25-08-31 05:17:42 | D | - best error = [ 327.5837, 327.5837, 327.5837, 327.5837] +25-08-31 05:17:42 | D | + error = 327.5837 +25-08-31 05:17:42 | D | + scale = [min=0.0000, max=9.6342] +25-08-31 05:18:03 | D | - Smoothing Diffusion Block transformer_blocks.14 +25-08-31 05:18:03 | D | - Skipping Module transformer_blocks.14.norm1.linear +25-08-31 05:18:03 | D | - Skipping Module transformer_blocks.14.norm1_context.linear +25-08-31 05:18:03 | D | - Smoothing Transformer Block transformer_blocks.14 +25-08-31 05:18:03 | D | - transformer_blocks.14.attn.qkv_proj +25-08-31 05:18:03 | D | + w: sfp4_e2m1_all +25-08-31 05:18:03 | D | + x: sfp4_e2m1_all +25-08-31 05:18:03 | D | + y: None +25-08-31 05:18:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:18:03 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:18:04 | D | + x - AbsMax +25-08-31 05:18:04 | D | + x = [min=0.1187, max=25.1250] +25-08-31 05:18:04 | D | + w - AbsMax +25-08-31 05:18:04 | D | + w = [min=0.1147, max=0.7305] +25-08-31 05:18:04 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:18:05 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:19:18 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:19:18 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:19:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:19:18 | D | - sum error = [ 6326.5027, 6104.8882, 5980.5351, 5895.7642, 5833.5385] +25-08-31 05:19:18 | D | - best error = [ 6326.5027, 6104.8882, 5980.5351, 5895.7642, 5833.5385] +25-08-31 05:19:18 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:19:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:19:18 | D | - sum error = [ 5729.9522, 5696.1886, 5658.3888, 5585.9296, 5531.7946] +25-08-31 05:19:18 | D | - best error = [ 5729.9522, 5696.1886, 5658.3888, 5585.9296, 5531.7946] +25-08-31 05:19:18 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:19:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:19:18 | D | - sum error = [ 5566.1159, 5393.4961, 5383.8561, 5464.1539, 5426.9266] +25-08-31 05:19:18 | D | - best error = [ 5531.7946, 5393.4961, 5383.8561, 5383.8561, 5383.8561] +25-08-31 05:19:18 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:19:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:19:18 | D | - sum error = [ 5466.8995, 5439.3810, 5469.3894, 5491.8334, 5534.6257] +25-08-31 05:19:18 | D | - best error = [ 5383.8561, 5383.8561, 5383.8561, 5383.8561, 5383.8561] +25-08-31 05:19:18 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:19:18 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:19:18 | D | - sum error = [10918.4336, 9736.1886, 8976.5704, 8231.1341, 7710.1549] +25-08-31 05:19:18 | D | - best error = [ 5383.8561, 5383.8561, 5383.8561, 5383.8561, 5383.8561] +25-08-31 05:19:18 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:19:18 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:19:18 | D | - sum error = [ 7209.9229, 6972.5203, 6708.1789, 6356.3955, 6243.3501] +25-08-31 05:19:18 | D | - best error = [ 5383.8561, 5383.8561, 5383.8561, 5383.8561, 5383.8561] +25-08-31 05:19:18 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:19:18 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:19:18 | D | - sum error = [ 6130.0538, 5984.4826, 5716.7396, 5679.9048, 5667.8396] +25-08-31 05:19:18 | D | - best error = [ 5383.8561, 5383.8561, 5383.8561, 5383.8561, 5383.8561] +25-08-31 05:19:18 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:19:18 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:19:18 | D | - sum error = [ 5603.5783, 5543.0791, 5542.1900, 5565.8480] +25-08-31 05:19:18 | D | - best error = [ 5383.8561, 5383.8561, 5383.8561, 5383.8561] +25-08-31 05:19:18 | D | + error = 5383.8561 +25-08-31 05:19:18 | D | + scale = [min=0.2783, max=6.9193] +25-08-31 05:19:19 | D | - transformer_blocks.14.attn add_qkv_proj +25-08-31 05:19:19 | D | + w: sfp4_e2m1_all +25-08-31 05:19:19 | D | + x: sfp4_e2m1_all +25-08-31 05:19:19 | D | + y: None +25-08-31 05:19:19 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:19:19 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:19:19 | D | + x - AbsMax +25-08-31 05:19:19 | D | + x = [min=0.1514, max=18.1250] +25-08-31 05:19:19 | D | + w - AbsMax +25-08-31 05:19:19 | D | + w = [min=0.1152, max=0.4961] +25-08-31 05:19:19 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:19:20 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:20:23 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:20:23 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:20:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:20:23 | D | - sum error = [ 1210.4203, 1200.5955, 1176.0784, 1164.3543, 1166.8900] +25-08-31 05:20:23 | D | - best error = [ 1210.4203, 1200.5955, 1176.0784, 1164.3543, 1164.3543] +25-08-31 05:20:23 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:20:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:20:23 | D | - sum error = [ 1130.1244, 1100.5215, 1096.4787, 1082.5077, 1080.8217] +25-08-31 05:20:23 | D | - best error = [ 1130.1244, 1100.5215, 1096.4787, 1082.5077, 1080.8217] +25-08-31 05:20:23 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:20:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:20:23 | D | - sum error = [ 1073.3972, 1069.5022, 1075.5156, 1068.8823, 1076.3955] +25-08-31 05:20:23 | D | - best error = [ 1073.3972, 1069.5022, 1069.5022, 1068.8823, 1068.8823] +25-08-31 05:20:23 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:20:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:20:23 | D | - sum error = [ 1120.3816, 1135.5637, 1139.7283, 1130.6709, 1168.0348] +25-08-31 05:20:23 | D | - best error = [ 1068.8823, 1068.8823, 1068.8823, 1068.8823, 1068.8823] +25-08-31 05:20:23 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:20:23 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:20:23 | D | - sum error = [ 1469.0313, 1401.0398, 1385.6654, 1328.3499, 1277.5980] +25-08-31 05:20:23 | D | - best error = [ 1068.8823, 1068.8823, 1068.8823, 1068.8823, 1068.8823] +25-08-31 05:20:23 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:20:23 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:20:23 | D | - sum error = [ 1270.8107, 1227.6770, 1180.4212, 1156.7258, 1159.3656] +25-08-31 05:20:23 | D | - best error = [ 1068.8823, 1068.8823, 1068.8823, 1068.8823, 1068.8823] +25-08-31 05:20:23 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:20:23 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:20:23 | D | - sum error = [ 1120.6659, 1133.6698, 1098.9324, 1118.9605, 1129.7145] +25-08-31 05:20:23 | D | - best error = [ 1068.8823, 1068.8823, 1068.8823, 1068.8823, 1068.8823] +25-08-31 05:20:23 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:20:23 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:20:23 | D | - sum error = [ 1140.3110, 1152.2351, 1133.0351, 1149.8149] +25-08-31 05:20:23 | D | - best error = [ 1068.8823, 1068.8823, 1068.8823, 1068.8823] +25-08-31 05:20:23 | D | + error = 1068.8823 +25-08-31 05:20:23 | D | + scale = [min=0.2931, max=6.5748] +25-08-31 05:20:24 | D | - transformer_blocks.14.attn.out_proj + transformer_blocks.14.attn.add_out_proj +25-08-31 05:20:24 | D | + w: sfp4_e2m1_all +25-08-31 05:20:24 | D | + x: sfp4_e2m1_all +25-08-31 05:20:24 | D | + y: None +25-08-31 05:20:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:20:24 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:20:24 | D | + x - AbsMax +25-08-31 05:20:24 | D | + x = [min=3.1875, max=31.3750] +25-08-31 05:20:24 | D | + w - AbsMax +25-08-31 05:20:24 | D | + w = [min=0.1182, max=0.4102] +25-08-31 05:20:24 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:20:26 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:22:11 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:22:11 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:22:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:22:11 | D | - sum error = [ 8706.7101, 8729.0618, 8739.6815, 8722.5863, 8753.5191] +25-08-31 05:22:11 | D | - best error = [ 8706.7101, 8706.7101, 8706.7101, 8706.7101, 8706.7101] +25-08-31 05:22:11 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:22:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:22:11 | D | - sum error = [ 8763.4833, 8735.5485, 8738.7857, 8746.4635, 8742.3640] +25-08-31 05:22:11 | D | - best error = [ 8706.7101, 8706.7101, 8706.7101, 8706.7101, 8706.7101] +25-08-31 05:22:11 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:22:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:22:11 | D | - sum error = [ 8762.1755, 8779.3845, 8815.2028, 8835.5174, 8843.9584] +25-08-31 05:22:11 | D | - best error = [ 8706.7101, 8706.7101, 8706.7101, 8706.7101, 8706.7101] +25-08-31 05:22:11 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:22:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:22:11 | D | - sum error = [ 8860.3318, 8865.6903, 8890.4511, 8920.5865, 8938.5918] +25-08-31 05:22:11 | D | - best error = [ 8706.7101, 8706.7101, 8706.7101, 8706.7101, 8706.7101] +25-08-31 05:22:11 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:22:11 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:22:11 | D | - sum error = [ 8834.5952, 8816.8185, 8776.1149, 8793.5699, 8780.9840] +25-08-31 05:22:11 | D | - best error = [ 8706.7101, 8706.7101, 8706.7101, 8706.7101, 8706.7101] +25-08-31 05:22:11 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:22:11 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:22:11 | D | - sum error = [ 8796.4084, 8780.9763, 8786.0458, 8780.3745, 8797.4805] +25-08-31 05:22:11 | D | - best error = [ 8706.7101, 8706.7101, 8706.7101, 8706.7101, 8706.7101] +25-08-31 05:22:11 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:22:11 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:22:11 | D | - sum error = [ 8802.1725, 8859.2609, 8843.2623, 8851.3547, 8842.8900] +25-08-31 05:22:11 | D | - best error = [ 8706.7101, 8706.7101, 8706.7101, 8706.7101, 8706.7101] +25-08-31 05:22:11 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:22:11 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:22:11 | D | - sum error = [ 8882.1250, 8899.3492, 8909.1593, 8921.5357] +25-08-31 05:22:11 | D | - best error = [ 8706.7101, 8706.7101, 8706.7101, 8706.7101] +25-08-31 05:22:11 | D | + error = 8706.7101 +25-08-31 05:22:11 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 05:22:12 | D | - transformer_blocks.14.ff.up_proj +25-08-31 05:22:12 | D | + w: sfp4_e2m1_all +25-08-31 05:22:12 | D | + x: sfp4_e2m1_all +25-08-31 05:22:12 | D | + y: None +25-08-31 05:22:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:22:12 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:22:12 | D | + x - AbsMax +25-08-31 05:22:12 | D | + x = [min=0.1445, max=6.2188] +25-08-31 05:22:12 | D | + w - AbsMax +25-08-31 05:22:12 | D | + w = [min=0.1113, max=0.5312] +25-08-31 05:22:12 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:22:14 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:23:30 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:23:30 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:23:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:23:30 | D | - sum error = [ 7791.6417, 7753.3422, 7719.6218, 7703.0191, 7672.0222] +25-08-31 05:23:30 | D | - best error = [ 7791.6417, 7753.3422, 7719.6218, 7703.0191, 7672.0222] +25-08-31 05:23:30 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:23:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:23:30 | D | - sum error = [ 7652.5920, 7636.6579, 7619.8772, 7616.5961, 7602.5527] +25-08-31 05:23:30 | D | - best error = [ 7652.5920, 7636.6579, 7619.8772, 7616.5961, 7602.5527] +25-08-31 05:23:30 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:23:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:23:30 | D | - sum error = [ 7594.4237, 7597.0049, 7610.9842, 7625.1605, 7617.4484] +25-08-31 05:23:30 | D | - best error = [ 7594.4237, 7594.4237, 7594.4237, 7594.4237, 7594.4237] +25-08-31 05:23:30 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:23:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:23:30 | D | - sum error = [ 7634.4110, 7656.4425, 7672.4927, 7693.6228, 7727.7241] +25-08-31 05:23:30 | D | - best error = [ 7594.4237, 7594.4237, 7594.4237, 7594.4237, 7594.4237] +25-08-31 05:23:30 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:23:30 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:23:30 | D | - sum error = [ 7937.3066, 7869.2114, 7838.9211, 7775.0977, 7738.9061] +25-08-31 05:23:30 | D | - best error = [ 7594.4237, 7594.4237, 7594.4237, 7594.4237, 7594.4237] +25-08-31 05:23:30 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:23:30 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:23:30 | D | - sum error = [ 7707.0692, 7680.4237, 7670.1661, 7634.9318, 7630.7887] +25-08-31 05:23:30 | D | - best error = [ 7594.4237, 7594.4237, 7594.4237, 7594.4237, 7594.4237] +25-08-31 05:23:30 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:23:30 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:23:30 | D | - sum error = [ 7624.8969, 7629.9172, 7620.7984, 7620.5470, 7630.5968] +25-08-31 05:23:30 | D | - best error = [ 7594.4237, 7594.4237, 7594.4237, 7594.4237, 7594.4237] +25-08-31 05:23:30 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:23:30 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:23:30 | D | - sum error = [ 7647.9529, 7664.6273, 7704.3276, 7727.2047] +25-08-31 05:23:30 | D | - best error = [ 7594.4237, 7594.4237, 7594.4237, 7594.4237] +25-08-31 05:23:30 | D | + error = 7594.4237 +25-08-31 05:23:30 | D | + scale = [min=0.3802, max=2.4937] +25-08-31 05:23:30 | D | - transformer_blocks.14.ff.down_proj +25-08-31 05:23:30 | D | + w: sfp4_e2m1_all +25-08-31 05:23:30 | D | + x: sfp4_e2m1_all +25-08-31 05:23:30 | D | + y: None +25-08-31 05:23:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:23:30 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:23:32 | D | + x - AbsMax +25-08-31 05:23:32 | D | + x = [min=0.0000, max=15.3750] +25-08-31 05:23:32 | D | + w - AbsMax +25-08-31 05:23:32 | D | + w = [min=0.0806, max=0.7305] +25-08-31 05:23:32 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:23:34 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:25:23 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:25:23 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:25:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:25:23 | D | - sum error = [ 6852.1944, 6848.9766, 6851.8973, 6833.4431, 6846.3491] +25-08-31 05:25:23 | D | - best error = [ 6852.1944, 6848.9766, 6848.9766, 6833.4431, 6833.4431] +25-08-31 05:25:23 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:25:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:25:23 | D | - sum error = [ 6845.9521, 6858.1347, 6988.7930, 6895.1125, 6904.6471] +25-08-31 05:25:23 | D | - best error = [ 6833.4431, 6833.4431, 6833.4431, 6833.4431, 6833.4431] +25-08-31 05:25:23 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:25:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:25:23 | D | - sum error = [ 6929.1154, 6948.9386, 6976.5599, 7021.0276, 7061.5061] +25-08-31 05:25:23 | D | - best error = [ 6833.4431, 6833.4431, 6833.4431, 6833.4431, 6833.4431] +25-08-31 05:25:23 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:25:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:25:23 | D | - sum error = [ 7095.0707, 7217.5054, 7190.0300, 7243.2233, 7303.4072] +25-08-31 05:25:23 | D | - best error = [ 6833.4431, 6833.4431, 6833.4431, 6833.4431, 6833.4431] +25-08-31 05:25:23 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:25:23 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:25:23 | D | - sum error = [ 7271.9695, 7216.7414, 7159.4838, 7101.3503, 7063.3369] +25-08-31 05:25:23 | D | - best error = [ 6833.4431, 6833.4431, 6833.4431, 6833.4431, 6833.4431] +25-08-31 05:25:23 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:25:23 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:25:23 | D | - sum error = [ 7036.4966, 6999.0571, 6969.2926, 6944.7491, 6944.1714] +25-08-31 05:25:23 | D | - best error = [ 6833.4431, 6833.4431, 6833.4431, 6833.4431, 6833.4431] +25-08-31 05:25:23 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:25:23 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:25:23 | D | - sum error = [ 6940.4435, 6953.1308, 6956.1478, 6995.5460, 7037.1400] +25-08-31 05:25:23 | D | - best error = [ 6833.4431, 6833.4431, 6833.4431, 6833.4431, 6833.4431] +25-08-31 05:25:23 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:25:23 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:25:23 | D | - sum error = [ 7072.4290, 7125.5633, 7187.6561, 7280.1887] +25-08-31 05:25:23 | D | - best error = [ 6833.4431, 6833.4431, 6833.4431, 6833.4431] +25-08-31 05:25:23 | D | + error = 6833.4431 +25-08-31 05:25:23 | D | + scale = [min=0.1050, max=1.5067] +25-08-31 05:25:24 | D | - transformer_blocks.14.ff_context.up_proj +25-08-31 05:25:24 | D | + w: sfp4_e2m1_all +25-08-31 05:25:24 | D | + x: sfp4_e2m1_all +25-08-31 05:25:24 | D | + y: None +25-08-31 05:25:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:25:24 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:25:24 | D | + x - AbsMax +25-08-31 05:25:24 | D | + x = [min=0.1484, max=53.5000] +25-08-31 05:25:24 | D | + w - AbsMax +25-08-31 05:25:24 | D | + w = [min=0.1099, max=0.4473] +25-08-31 05:25:24 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:25:24 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:26:07 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:26:07 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:26:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:26:07 | D | - sum error = [ 3391.2464, 3162.6005, 2982.6254, 2801.3341, 2627.5678] +25-08-31 05:26:07 | D | - best error = [ 3391.2464, 3162.6005, 2982.6254, 2801.3341, 2627.5678] +25-08-31 05:26:07 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:26:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:26:07 | D | - sum error = [ 2497.0686, 2355.2225, 2234.5492, 2124.8265, 2037.1479] +25-08-31 05:26:07 | D | - best error = [ 2497.0686, 2355.2225, 2234.5492, 2124.8265, 2037.1479] +25-08-31 05:26:07 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:26:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:26:07 | D | - sum error = [ 1969.0941, 1925.7020, 1899.8843, 1883.9468, 1869.7004] +25-08-31 05:26:07 | D | - best error = [ 1969.0941, 1925.7020, 1899.8843, 1883.9468, 1869.7004] +25-08-31 05:26:07 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:26:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:26:07 | D | - sum error = [ 1875.1315, 1907.5514, 1946.1448, 1995.3357, 2049.2976] +25-08-31 05:26:07 | D | - best error = [ 1869.7004, 1869.7004, 1869.7004, 1869.7004, 1869.7004] +25-08-31 05:26:07 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:26:07 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:26:07 | D | - sum error = [ 3594.6149, 3355.6962, 3124.6904, 2912.0537, 2717.8051] +25-08-31 05:26:07 | D | - best error = [ 1869.7004, 1869.7004, 1869.7004, 1869.7004, 1869.7004] +25-08-31 05:26:07 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:26:07 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:26:07 | D | - sum error = [ 2548.6282, 2383.3100, 2249.6374, 2143.8453, 2045.8687] +25-08-31 05:26:07 | D | - best error = [ 1869.7004, 1869.7004, 1869.7004, 1869.7004, 1869.7004] +25-08-31 05:26:07 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:26:07 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:26:07 | D | - sum error = [ 1977.1533, 1925.6119, 1897.9094, 1875.5109, 1887.5097] +25-08-31 05:26:07 | D | - best error = [ 1869.7004, 1869.7004, 1869.7004, 1869.7004, 1869.7004] +25-08-31 05:26:07 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:26:07 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:26:07 | D | - sum error = [ 1900.9853, 1932.9823, 1960.5501, 2043.3462] +25-08-31 05:26:07 | D | - best error = [ 1869.7004, 1869.7004, 1869.7004, 1869.7004] +25-08-31 05:26:07 | D | + error = 1869.7004 +25-08-31 05:26:07 | D | + scale = [min=0.2631, max=16.2124] +25-08-31 05:26:07 | D | - transformer_blocks.14.ff_context.down_proj +25-08-31 05:26:07 | D | + w: sfp4_e2m1_all +25-08-31 05:26:07 | D | + x: sfp4_e2m1_all +25-08-31 05:26:07 | D | + y: None +25-08-31 05:26:07 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:26:07 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:26:07 | D | + x - AbsMax +25-08-31 05:26:07 | D | + x = [min=0.0000, max=37.0000] +25-08-31 05:26:07 | D | + w - AbsMax +25-08-31 05:26:07 | D | + w = [min=0.0298, max=0.7344] +25-08-31 05:26:07 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:26:08 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:26:55 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:26:55 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:26:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:26:55 | D | - sum error = [ 1781.2066, 1569.9696, 1450.2137, 1312.1950, 1220.6103] +25-08-31 05:26:55 | D | - best error = [ 1781.2066, 1569.9696, 1450.2137, 1312.1950, 1220.6103] +25-08-31 05:26:55 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:26:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:26:55 | D | - sum error = [ 1154.7516, 1076.5945, 1034.8832, 1004.1309, 970.6814] +25-08-31 05:26:55 | D | - best error = [ 1154.7516, 1076.5945, 1034.8832, 1004.1309, 970.6814] +25-08-31 05:26:55 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:26:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:26:55 | D | - sum error = [ 916.8629, 898.7631, 875.0000, 862.9145, 868.3889] +25-08-31 05:26:55 | D | - best error = [ 916.8629, 898.7631, 875.0000, 862.9145, 862.9145] +25-08-31 05:26:55 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:26:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:26:55 | D | - sum error = [ 831.7732, 802.7092, 813.2580, 813.1942, 812.3815] +25-08-31 05:26:55 | D | - best error = [ 831.7732, 802.7092, 802.7092, 802.7092, 802.7092] +25-08-31 05:26:55 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:26:55 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:26:55 | D | - sum error = [ 1660.4181, 1442.6637, 1307.0228, 1226.9553, 1145.7200] +25-08-31 05:26:55 | D | - best error = [ 802.7092, 802.7092, 802.7092, 802.7092, 802.7092] +25-08-31 05:26:55 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:26:55 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:26:55 | D | - sum error = [ 1085.9068, 1025.9423, 1006.7902, 959.3210, 929.7843] +25-08-31 05:26:55 | D | - best error = [ 802.7092, 802.7092, 802.7092, 802.7092, 802.7092] +25-08-31 05:26:55 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:26:55 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:26:55 | D | - sum error = [ 898.8467, 882.4019, 852.4845, 829.6546, 847.8019] +25-08-31 05:26:55 | D | - best error = [ 802.7092, 802.7092, 802.7092, 802.7092, 802.7092] +25-08-31 05:26:55 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:26:55 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:26:55 | D | - sum error = [ 843.5239, 822.9357, 816.4282, 810.2107] +25-08-31 05:26:55 | D | - best error = [ 802.7092, 802.7092, 802.7092, 802.7092] +25-08-31 05:26:55 | D | + error = 802.7092 +25-08-31 05:26:55 | D | + scale = [min=0.0000, max=17.9706] +25-08-31 05:27:14 | D | - Smoothing Diffusion Block transformer_blocks.15 +25-08-31 05:27:14 | D | - Skipping Module transformer_blocks.15.norm1.linear +25-08-31 05:27:14 | D | - Skipping Module transformer_blocks.15.norm1_context.linear +25-08-31 05:27:14 | D | - Smoothing Transformer Block transformer_blocks.15 +25-08-31 05:27:14 | D | - transformer_blocks.15.attn.qkv_proj +25-08-31 05:27:14 | D | + w: sfp4_e2m1_all +25-08-31 05:27:14 | D | + x: sfp4_e2m1_all +25-08-31 05:27:14 | D | + y: None +25-08-31 05:27:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:27:14 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:27:14 | D | + x - AbsMax +25-08-31 05:27:14 | D | + x = [min=0.1445, max=13.5000] +25-08-31 05:27:14 | D | + w - AbsMax +25-08-31 05:27:14 | D | + w = [min=0.1250, max=0.6250] +25-08-31 05:27:14 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:27:15 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:28:28 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:28:28 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:28:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:28:28 | D | - sum error = [ 5769.1046, 5743.2862, 5716.9436, 5641.7799, 5600.0564] +25-08-31 05:28:28 | D | - best error = [ 5769.1046, 5743.2862, 5716.9436, 5641.7799, 5600.0564] +25-08-31 05:28:28 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:28:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:28:28 | D | - sum error = [ 5515.1020, 5432.4600, 5420.4173, 5440.3768, 5417.6443] +25-08-31 05:28:28 | D | - best error = [ 5515.1020, 5432.4600, 5420.4173, 5420.4173, 5417.6443] +25-08-31 05:28:28 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:28:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:28:28 | D | - sum error = [ 5484.1599, 5486.9653, 5490.1545, 5519.3016, 5584.8456] +25-08-31 05:28:28 | D | - best error = [ 5417.6443, 5417.6443, 5417.6443, 5417.6443, 5417.6443] +25-08-31 05:28:28 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:28:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:28:28 | D | - sum error = [ 5506.9472, 5498.1795, 5553.6251, 5603.7509, 5691.7075] +25-08-31 05:28:28 | D | - best error = [ 5417.6443, 5417.6443, 5417.6443, 5417.6443, 5417.6443] +25-08-31 05:28:28 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:28:28 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:28:28 | D | - sum error = [10310.8040, 9276.7822, 8531.8223, 7915.6731, 7588.4098] +25-08-31 05:28:28 | D | - best error = [ 5417.6443, 5417.6443, 5417.6443, 5417.6443, 5417.6443] +25-08-31 05:28:28 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:28:28 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:28:28 | D | - sum error = [ 7315.6949, 6987.6668, 6872.4153, 6552.9255, 6284.8096] +25-08-31 05:28:28 | D | - best error = [ 5417.6443, 5417.6443, 5417.6443, 5417.6443, 5417.6443] +25-08-31 05:28:28 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:28:28 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:28:28 | D | - sum error = [ 6075.5234, 6039.0218, 5897.3628, 5968.2862, 5832.3436] +25-08-31 05:28:28 | D | - best error = [ 5417.6443, 5417.6443, 5417.6443, 5417.6443, 5417.6443] +25-08-31 05:28:28 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:28:28 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:28:28 | D | - sum error = [ 5725.6683, 5747.8031, 5757.3959, 5738.9426] +25-08-31 05:28:28 | D | - best error = [ 5417.6443, 5417.6443, 5417.6443, 5417.6443] +25-08-31 05:28:28 | D | + error = 5417.6443 +25-08-31 05:28:28 | D | + scale = [min=0.4188, max=3.2259] +25-08-31 05:28:29 | D | - transformer_blocks.15.attn add_qkv_proj +25-08-31 05:28:29 | D | + w: sfp4_e2m1_all +25-08-31 05:28:29 | D | + x: sfp4_e2m1_all +25-08-31 05:28:29 | D | + y: None +25-08-31 05:28:29 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:28:29 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:28:29 | D | + x - AbsMax +25-08-31 05:28:29 | D | + x = [min=0.1270, max=22.8750] +25-08-31 05:28:29 | D | + w - AbsMax +25-08-31 05:28:29 | D | + w = [min=0.1094, max=0.5625] +25-08-31 05:28:29 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:28:30 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:29:32 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:29:32 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:29:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:29:32 | D | - sum error = [ 1224.0495, 1192.2256, 1164.1156, 1133.6945, 1102.5710] +25-08-31 05:29:32 | D | - best error = [ 1224.0495, 1192.2256, 1164.1156, 1133.6945, 1102.5710] +25-08-31 05:29:32 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:29:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:29:32 | D | - sum error = [ 1074.5838, 1033.3348, 1048.0937, 1036.0130, 1013.6799] +25-08-31 05:29:32 | D | - best error = [ 1074.5838, 1033.3348, 1033.3348, 1033.3348, 1013.6799] +25-08-31 05:29:32 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:29:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:29:32 | D | - sum error = [ 994.1969, 1005.7351, 999.2859, 990.3926, 1004.6718] +25-08-31 05:29:32 | D | - best error = [ 994.1969, 994.1969, 994.1969, 990.3926, 990.3926] +25-08-31 05:29:32 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:29:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:29:32 | D | - sum error = [ 996.7913, 1008.2823, 1042.9693, 1064.2213, 1086.8655] +25-08-31 05:29:32 | D | - best error = [ 990.3926, 990.3926, 990.3926, 990.3926, 990.3926] +25-08-31 05:29:32 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:29:32 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:29:32 | D | - sum error = [ 1472.5865, 1436.1737, 1340.9213, 1290.0264, 1232.6101] +25-08-31 05:29:32 | D | - best error = [ 990.3926, 990.3926, 990.3926, 990.3926, 990.3926] +25-08-31 05:29:32 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:29:32 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:29:32 | D | - sum error = [ 1207.4647, 1175.1810, 1132.3366, 1094.1304, 1049.6365] +25-08-31 05:29:32 | D | - best error = [ 990.3926, 990.3926, 990.3926, 990.3926, 990.3926] +25-08-31 05:29:32 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:29:32 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:29:32 | D | - sum error = [ 1046.4603, 1035.4538, 1027.2188, 1014.4856, 1008.6212] +25-08-31 05:29:32 | D | - best error = [ 990.3926, 990.3926, 990.3926, 990.3926, 990.3926] +25-08-31 05:29:32 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:29:32 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:29:32 | D | - sum error = [ 1020.2712, 1049.4829, 1060.1196, 1092.2800] +25-08-31 05:29:32 | D | - best error = [ 990.3926, 990.3926, 990.3926, 990.3926] +25-08-31 05:29:32 | D | + error = 990.3926 +25-08-31 05:29:32 | D | + scale = [min=0.2614, max=7.6486] +25-08-31 05:29:32 | D | - transformer_blocks.15.attn.out_proj + transformer_blocks.15.attn.add_out_proj +25-08-31 05:29:32 | D | + w: sfp4_e2m1_all +25-08-31 05:29:32 | D | + x: sfp4_e2m1_all +25-08-31 05:29:32 | D | + y: None +25-08-31 05:29:32 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:29:32 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:29:33 | D | + x - AbsMax +25-08-31 05:29:33 | D | + x = [min=3.2969, max=13.9375] +25-08-31 05:29:33 | D | + w - AbsMax +25-08-31 05:29:33 | D | + w = [min=0.1299, max=0.3633] +25-08-31 05:29:33 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:29:34 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:31:19 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:31:19 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:31:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:31:19 | D | - sum error = [ 7773.9442, 7770.8970, 7777.9067, 7782.7070, 7784.4270] +25-08-31 05:31:19 | D | - best error = [ 7773.9442, 7770.8970, 7770.8970, 7770.8970, 7770.8970] +25-08-31 05:31:19 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:31:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:31:19 | D | - sum error = [ 7766.2938, 7761.8998, 7766.4656, 7770.0965, 7775.9007] +25-08-31 05:31:19 | D | - best error = [ 7766.2938, 7761.8998, 7761.8998, 7761.8998, 7761.8998] +25-08-31 05:31:19 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:31:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:31:19 | D | - sum error = [ 7774.2704, 7794.6992, 7840.0168, 7818.8971, 7840.6104] +25-08-31 05:31:19 | D | - best error = [ 7761.8998, 7761.8998, 7761.8998, 7761.8998, 7761.8998] +25-08-31 05:31:19 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:31:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:31:19 | D | - sum error = [ 7835.7545, 7854.1803, 7862.5169, 7881.6179, 7882.7480] +25-08-31 05:31:19 | D | - best error = [ 7761.8998, 7761.8998, 7761.8998, 7761.8998, 7761.8998] +25-08-31 05:31:19 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:31:19 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:31:19 | D | - sum error = [ 7821.5093, 7812.9490, 7789.9266, 7804.4316, 7790.2416] +25-08-31 05:31:19 | D | - best error = [ 7761.8998, 7761.8998, 7761.8998, 7761.8998, 7761.8998] +25-08-31 05:31:19 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:31:19 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:31:19 | D | - sum error = [ 7800.0669, 7813.4336, 7811.5858, 7797.5165, 7807.7700] +25-08-31 05:31:19 | D | - best error = [ 7761.8998, 7761.8998, 7761.8998, 7761.8998, 7761.8998] +25-08-31 05:31:19 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:31:19 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:31:19 | D | - sum error = [ 7791.1316, 7819.7852, 7832.4612, 7837.4802, 7848.3947] +25-08-31 05:31:19 | D | - best error = [ 7761.8998, 7761.8998, 7761.8998, 7761.8998, 7761.8998] +25-08-31 05:31:19 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:31:19 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:31:19 | D | - sum error = [ 7859.1277, 7862.1941, 7875.3504, 7913.8365] +25-08-31 05:31:19 | D | - best error = [ 7761.8998, 7761.8998, 7761.8998, 7761.8998] +25-08-31 05:31:19 | D | + error = 7761.8998 +25-08-31 05:31:19 | D | + scale = [min=1.4303, max=2.2042] +25-08-31 05:31:19 | D | - transformer_blocks.15.ff.up_proj +25-08-31 05:31:19 | D | + w: sfp4_e2m1_all +25-08-31 05:31:19 | D | + x: sfp4_e2m1_all +25-08-31 05:31:19 | D | + y: None +25-08-31 05:31:19 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:31:19 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:31:20 | D | + x - AbsMax +25-08-31 05:31:20 | D | + x = [min=0.2256, max=5.5938] +25-08-31 05:31:20 | D | + w - AbsMax +25-08-31 05:31:20 | D | + w = [min=0.1030, max=0.4551] +25-08-31 05:31:20 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:31:21 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:32:36 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:32:36 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:32:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:32:36 | D | - sum error = [ 7861.2597, 7825.8301, 7804.9085, 7777.8596, 7750.5757] +25-08-31 05:32:36 | D | - best error = [ 7861.2597, 7825.8301, 7804.9085, 7777.8596, 7750.5757] +25-08-31 05:32:36 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:32:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:32:36 | D | - sum error = [ 7734.4918, 7709.8708, 7712.9724, 7698.1730, 7692.8242] +25-08-31 05:32:36 | D | - best error = [ 7734.4918, 7709.8708, 7709.8708, 7698.1730, 7692.8242] +25-08-31 05:32:36 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:32:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:32:36 | D | - sum error = [ 7703.8886, 7694.2573, 7697.6625, 7685.3112, 7727.2489] +25-08-31 05:32:36 | D | - best error = [ 7692.8242, 7692.8242, 7692.8242, 7685.3112, 7685.3112] +25-08-31 05:32:36 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:32:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:32:36 | D | - sum error = [ 7736.9120, 7753.8553, 7766.3927, 7807.9590, 7830.7782] +25-08-31 05:32:36 | D | - best error = [ 7685.3112, 7685.3112, 7685.3112, 7685.3112, 7685.3112] +25-08-31 05:32:36 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:32:36 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:32:36 | D | - sum error = [ 8049.1495, 7988.1723, 7943.6933, 7891.3459, 7848.6817] +25-08-31 05:32:36 | D | - best error = [ 7685.3112, 7685.3112, 7685.3112, 7685.3112, 7685.3112] +25-08-31 05:32:36 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:32:36 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:32:36 | D | - sum error = [ 7813.7225, 7783.2227, 7748.3436, 7729.5138, 7722.3401] +25-08-31 05:32:36 | D | - best error = [ 7685.3112, 7685.3112, 7685.3112, 7685.3112, 7685.3112] +25-08-31 05:32:36 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:32:36 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:32:36 | D | - sum error = [ 7716.1083, 7716.8512, 7714.5491, 7720.5565, 7726.0171] +25-08-31 05:32:36 | D | - best error = [ 7685.3112, 7685.3112, 7685.3112, 7685.3112, 7685.3112] +25-08-31 05:32:36 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:32:36 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:32:36 | D | - sum error = [ 7750.5326, 7774.7718, 7794.4891, 7822.5426] +25-08-31 05:32:36 | D | - best error = [ 7685.3112, 7685.3112, 7685.3112, 7685.3112] +25-08-31 05:32:36 | D | + error = 7685.3112 +25-08-31 05:32:36 | D | + scale = [min=0.3799, max=3.0620] +25-08-31 05:32:36 | D | - transformer_blocks.15.ff.down_proj +25-08-31 05:32:36 | D | + w: sfp4_e2m1_all +25-08-31 05:32:36 | D | + x: sfp4_e2m1_all +25-08-31 05:32:36 | D | + y: None +25-08-31 05:32:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:32:36 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:32:38 | D | + x - AbsMax +25-08-31 05:32:38 | D | + x = [min=0.0000, max=25.1250] +25-08-31 05:32:38 | D | + w - AbsMax +25-08-31 05:32:38 | D | + w = [min=0.0684, max=0.8633] +25-08-31 05:32:38 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:32:39 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:34:19 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:34:19 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:34:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:34:19 | D | - sum error = [ 6180.0945, 6178.7306, 6175.4206, 6189.2281, 6178.4717] +25-08-31 05:34:19 | D | - best error = [ 6180.0945, 6178.7306, 6175.4206, 6175.4206, 6175.4206] +25-08-31 05:34:19 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:34:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:34:19 | D | - sum error = [ 6182.5458, 6183.6080, 6205.7486, 6224.8174, 6237.7053] +25-08-31 05:34:19 | D | - best error = [ 6175.4206, 6175.4206, 6175.4206, 6175.4206, 6175.4206] +25-08-31 05:34:19 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:34:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:34:19 | D | - sum error = [ 6252.7575, 6277.4848, 6292.8896, 6317.4770, 6348.5885] +25-08-31 05:34:19 | D | - best error = [ 6175.4206, 6175.4206, 6175.4206, 6175.4206, 6175.4206] +25-08-31 05:34:19 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:34:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:34:19 | D | - sum error = [ 6377.8699, 6420.7096, 6442.1819, 6494.6495, 6538.9920] +25-08-31 05:34:19 | D | - best error = [ 6175.4206, 6175.4206, 6175.4206, 6175.4206, 6175.4206] +25-08-31 05:34:19 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:34:19 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:34:19 | D | - sum error = [ 6457.0749, 6404.3885, 6383.3336, 6340.2101, 6316.7378] +25-08-31 05:34:19 | D | - best error = [ 6175.4206, 6175.4206, 6175.4206, 6175.4206, 6175.4206] +25-08-31 05:34:19 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:34:19 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:34:19 | D | - sum error = [ 6294.4420, 6272.4168, 6267.5679, 6247.0870, 6232.9605] +25-08-31 05:34:19 | D | - best error = [ 6175.4206, 6175.4206, 6175.4206, 6175.4206, 6175.4206] +25-08-31 05:34:19 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:34:19 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:34:19 | D | - sum error = [ 6252.3871, 6254.1033, 6278.5814, 6295.1639, 6330.1409] +25-08-31 05:34:19 | D | - best error = [ 6175.4206, 6175.4206, 6175.4206, 6175.4206, 6175.4206] +25-08-31 05:34:19 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:34:19 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:34:19 | D | - sum error = [ 6368.1228, 6411.8575, 6464.1900, 6512.9514] +25-08-31 05:34:19 | D | - best error = [ 6175.4206, 6175.4206, 6175.4206, 6175.4206] +25-08-31 05:34:19 | D | + error = 6175.4206 +25-08-31 05:34:19 | D | + scale = [min=0.2079, max=1.3804] +25-08-31 05:34:19 | D | - transformer_blocks.15.ff_context.up_proj +25-08-31 05:34:19 | D | + w: sfp4_e2m1_all +25-08-31 05:34:19 | D | + x: sfp4_e2m1_all +25-08-31 05:34:19 | D | + y: None +25-08-31 05:34:19 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:34:19 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:34:19 | D | + x - AbsMax +25-08-31 05:34:19 | D | + x = [min=0.1885, max=43.7500] +25-08-31 05:34:19 | D | + w - AbsMax +25-08-31 05:34:19 | D | + w = [min=0.1025, max=0.5391] +25-08-31 05:34:19 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:34:20 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:35:03 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:35:03 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:35:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:35:03 | D | - sum error = [ 5085.0518, 4931.5286, 4817.5120, 4745.1173, 4655.0345] +25-08-31 05:35:03 | D | - best error = [ 5085.0518, 4931.5286, 4817.5120, 4745.1173, 4655.0345] +25-08-31 05:35:03 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:35:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:35:03 | D | - sum error = [ 4613.7009, 4548.4375, 4489.0237, 4445.8322, 4404.8743] +25-08-31 05:35:03 | D | - best error = [ 4613.7009, 4548.4375, 4489.0237, 4445.8322, 4404.8743] +25-08-31 05:35:03 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:35:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:35:03 | D | - sum error = [ 4386.1619, 4373.8866, 4344.1522, 4329.2960, 4342.4905] +25-08-31 05:35:03 | D | - best error = [ 4386.1619, 4373.8866, 4344.1522, 4329.2960, 4329.2960] +25-08-31 05:35:03 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:35:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:35:03 | D | - sum error = [ 4337.9440, 4382.9289, 4393.7676, 4413.7694, 4461.0687] +25-08-31 05:35:03 | D | - best error = [ 4329.2960, 4329.2960, 4329.2960, 4329.2960, 4329.2960] +25-08-31 05:35:03 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:35:03 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:35:03 | D | - sum error = [ 4985.4344, 4850.2701, 4727.3667, 4614.9509, 4541.4636] +25-08-31 05:35:03 | D | - best error = [ 4329.2960, 4329.2960, 4329.2960, 4329.2960, 4329.2960] +25-08-31 05:35:03 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:35:03 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:35:03 | D | - sum error = [ 4485.3332, 4446.8654, 4406.3023, 4370.9695, 4346.1783] +25-08-31 05:35:03 | D | - best error = [ 4329.2960, 4329.2960, 4329.2960, 4329.2960, 4329.2960] +25-08-31 05:35:03 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:35:03 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:35:03 | D | - sum error = [ 4341.1330, 4319.9488, 4334.0395, 4331.7576, 4334.7048] +25-08-31 05:35:03 | D | - best error = [ 4329.2960, 4319.9488, 4319.9488, 4319.9488, 4319.9488] +25-08-31 05:35:03 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:35:03 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:35:03 | D | - sum error = [ 4344.3598, 4374.6243, 4420.6000, 4449.6654] +25-08-31 05:35:03 | D | - best error = [ 4319.9488, 4319.9488, 4319.9488, 4319.9488] +25-08-31 05:35:03 | D | + error = 4319.9488 +25-08-31 05:35:03 | D | + scale = [min=0.7188, max=18.8798] +25-08-31 05:35:03 | D | - transformer_blocks.15.ff_context.down_proj +25-08-31 05:35:03 | D | + w: sfp4_e2m1_all +25-08-31 05:35:03 | D | + x: sfp4_e2m1_all +25-08-31 05:35:03 | D | + y: None +25-08-31 05:35:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:35:03 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:35:03 | D | + x - AbsMax +25-08-31 05:35:03 | D | + x = [min=0.0000, max=73.5000] +25-08-31 05:35:03 | D | + w - AbsMax +25-08-31 05:35:03 | D | + w = [min=0.0496, max=0.4648] +25-08-31 05:35:03 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:35:04 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:35:51 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:35:51 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:35:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:35:51 | D | - sum error = [ 1221.1511, 1188.9280, 1169.8955, 1147.6405, 1143.1670] +25-08-31 05:35:51 | D | - best error = [ 1221.1511, 1188.9280, 1169.8955, 1147.6405, 1143.1670] +25-08-31 05:35:51 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:35:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:35:51 | D | - sum error = [ 1119.1987, 1105.1290, 1107.6723, 1103.4859, 1094.0867] +25-08-31 05:35:51 | D | - best error = [ 1119.1987, 1105.1290, 1105.1290, 1103.4859, 1094.0867] +25-08-31 05:35:51 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:35:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:35:51 | D | - sum error = [ 1092.4281, 1089.8975, 1090.8026, 1091.3966, 1092.8333] +25-08-31 05:35:51 | D | - best error = [ 1092.4281, 1089.8975, 1089.8975, 1089.8975, 1089.8975] +25-08-31 05:35:51 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:35:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:35:51 | D | - sum error = [ 1100.7331, 1110.0071, 1126.8290, 1136.3006, 1149.6985] +25-08-31 05:35:51 | D | - best error = [ 1089.8975, 1089.8975, 1089.8975, 1089.8975, 1089.8975] +25-08-31 05:35:51 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:35:51 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:35:51 | D | - sum error = [ 1258.0774, 1232.3499, 1215.8327, 1200.5228, 1176.3585] +25-08-31 05:35:51 | D | - best error = [ 1089.8975, 1089.8975, 1089.8975, 1089.8975, 1089.8975] +25-08-31 05:35:51 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:35:51 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:35:51 | D | - sum error = [ 1173.3504, 1162.0555, 1150.1361, 1145.3641, 1136.3868] +25-08-31 05:35:51 | D | - best error = [ 1089.8975, 1089.8975, 1089.8975, 1089.8975, 1089.8975] +25-08-31 05:35:51 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:35:51 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:35:51 | D | - sum error = [ 1142.8609, 1137.8839, 1135.7606, 1136.9837, 1140.7476] +25-08-31 05:35:51 | D | - best error = [ 1089.8975, 1089.8975, 1089.8975, 1089.8975, 1089.8975] +25-08-31 05:35:51 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:35:51 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:35:51 | D | - sum error = [ 1145.6132, 1146.1194, 1152.6753, 1165.0076] +25-08-31 05:35:51 | D | - best error = [ 1089.8975, 1089.8975, 1089.8975, 1089.8975] +25-08-31 05:35:51 | D | + error = 1089.8975 +25-08-31 05:35:51 | D | + scale = [min=0.0002, max=10.6282] +25-08-31 05:36:09 | D | - Smoothing Diffusion Block transformer_blocks.16 +25-08-31 05:36:09 | D | - Skipping Module transformer_blocks.16.norm1.linear +25-08-31 05:36:09 | D | - Skipping Module transformer_blocks.16.norm1_context.linear +25-08-31 05:36:09 | D | - Smoothing Transformer Block transformer_blocks.16 +25-08-31 05:36:09 | D | - transformer_blocks.16.attn.qkv_proj +25-08-31 05:36:09 | D | + w: sfp4_e2m1_all +25-08-31 05:36:09 | D | + x: sfp4_e2m1_all +25-08-31 05:36:09 | D | + y: None +25-08-31 05:36:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:36:09 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:36:10 | D | + x - AbsMax +25-08-31 05:36:10 | D | + x = [min=0.1846, max=22.0000] +25-08-31 05:36:10 | D | + w - AbsMax +25-08-31 05:36:10 | D | + w = [min=0.1069, max=0.6836] +25-08-31 05:36:10 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:36:11 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:37:24 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:37:24 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:37:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:37:24 | D | - sum error = [ 6681.3083, 6429.5225, 6420.5065, 6374.5789, 6349.6104] +25-08-31 05:37:24 | D | - best error = [ 6681.3083, 6429.5225, 6420.5065, 6374.5789, 6349.6104] +25-08-31 05:37:24 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:37:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:37:24 | D | - sum error = [ 6286.1553, 6269.0883, 6161.0071, 6150.7297, 6228.7305] +25-08-31 05:37:24 | D | - best error = [ 6286.1553, 6269.0883, 6161.0071, 6150.7297, 6150.7297] +25-08-31 05:37:24 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:37:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:37:24 | D | - sum error = [ 6183.2312, 6087.4589, 6094.4329, 6118.3596, 6037.5445] +25-08-31 05:37:24 | D | - best error = [ 6150.7297, 6087.4589, 6087.4589, 6087.4589, 6037.5445] +25-08-31 05:37:24 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:37:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:37:24 | D | - sum error = [ 6030.4534, 6070.8223, 6135.8778, 6141.7548, 6191.4541] +25-08-31 05:37:24 | D | - best error = [ 6030.4534, 6030.4534, 6030.4534, 6030.4534, 6030.4534] +25-08-31 05:37:24 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:37:24 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:37:24 | D | - sum error = [11702.6294, 10716.6271, 9733.4475, 9010.1925, 8472.3012] +25-08-31 05:37:24 | D | - best error = [ 6030.4534, 6030.4534, 6030.4534, 6030.4534, 6030.4534] +25-08-31 05:37:24 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:37:24 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:37:24 | D | - sum error = [ 8096.6739, 7795.8615, 7542.3185, 7089.9069, 6986.1280] +25-08-31 05:37:24 | D | - best error = [ 6030.4534, 6030.4534, 6030.4534, 6030.4534, 6030.4534] +25-08-31 05:37:24 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:37:24 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:37:24 | D | - sum error = [ 6870.8649, 6652.0640, 6572.9735, 6402.5935, 6377.5017] +25-08-31 05:37:24 | D | - best error = [ 6030.4534, 6030.4534, 6030.4534, 6030.4534, 6030.4534] +25-08-31 05:37:24 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:37:24 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:37:24 | D | - sum error = [ 6290.8104, 6212.8254, 6251.7492, 6254.6332] +25-08-31 05:37:24 | D | - best error = [ 6030.4534, 6030.4534, 6030.4534, 6030.4534] +25-08-31 05:37:24 | D | + error = 6030.4534 +25-08-31 05:37:24 | D | + scale = [min=0.2816, max=10.1582] +25-08-31 05:37:24 | D | - transformer_blocks.16.attn add_qkv_proj +25-08-31 05:37:24 | D | + w: sfp4_e2m1_all +25-08-31 05:37:24 | D | + x: sfp4_e2m1_all +25-08-31 05:37:24 | D | + y: None +25-08-31 05:37:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:37:24 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:37:24 | D | + x - AbsMax +25-08-31 05:37:24 | D | + x = [min=0.2090, max=25.7500] +25-08-31 05:37:24 | D | + w - AbsMax +25-08-31 05:37:24 | D | + w = [min=0.1079, max=0.5859] +25-08-31 05:37:24 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:37:25 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:38:28 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:38:28 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:38:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:38:28 | D | - sum error = [ 1352.8552, 1333.8161, 1346.2954, 1304.9887, 1262.0238] +25-08-31 05:38:28 | D | - best error = [ 1352.8552, 1333.8161, 1333.8161, 1304.9887, 1262.0238] +25-08-31 05:38:28 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:38:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:38:28 | D | - sum error = [ 1240.1117, 1224.3073, 1217.8709, 1209.5387, 1206.3758] +25-08-31 05:38:28 | D | - best error = [ 1240.1117, 1224.3073, 1217.8709, 1209.5387, 1206.3758] +25-08-31 05:38:28 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:38:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:38:28 | D | - sum error = [ 1205.4618, 1195.8979, 1189.2949, 1182.9842, 1192.3054] +25-08-31 05:38:28 | D | - best error = [ 1205.4618, 1195.8979, 1189.2949, 1182.9842, 1182.9842] +25-08-31 05:38:28 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:38:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:38:28 | D | - sum error = [ 1198.4996, 1215.6645, 1239.5861, 1267.1691, 1314.2341] +25-08-31 05:38:28 | D | - best error = [ 1182.9842, 1182.9842, 1182.9842, 1182.9842, 1182.9842] +25-08-31 05:38:28 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:38:28 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:38:28 | D | - sum error = [ 1763.2418, 1660.0200, 1592.9477, 1571.5957, 1445.4888] +25-08-31 05:38:28 | D | - best error = [ 1182.9842, 1182.9842, 1182.9842, 1182.9842, 1182.9842] +25-08-31 05:38:28 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:38:28 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:38:28 | D | - sum error = [ 1431.0016, 1380.9784, 1301.4410, 1255.9999, 1249.5628] +25-08-31 05:38:28 | D | - best error = [ 1182.9842, 1182.9842, 1182.9842, 1182.9842, 1182.9842] +25-08-31 05:38:28 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:38:28 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:38:28 | D | - sum error = [ 1220.5803, 1243.8083, 1226.3581, 1235.6420, 1197.4389] +25-08-31 05:38:28 | D | - best error = [ 1182.9842, 1182.9842, 1182.9842, 1182.9842, 1182.9842] +25-08-31 05:38:28 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:38:28 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:38:28 | D | - sum error = [ 1219.0849, 1222.7167, 1264.9130, 1284.7637] +25-08-31 05:38:28 | D | - best error = [ 1182.9842, 1182.9842, 1182.9842, 1182.9842] +25-08-31 05:38:28 | D | + error = 1182.9842 +25-08-31 05:38:28 | D | + scale = [min=0.3615, max=8.2605] +25-08-31 05:38:28 | D | - transformer_blocks.16.attn.out_proj + transformer_blocks.16.attn.add_out_proj +25-08-31 05:38:28 | D | + w: sfp4_e2m1_all +25-08-31 05:38:28 | D | + x: sfp4_e2m1_all +25-08-31 05:38:28 | D | + y: None +25-08-31 05:38:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:38:28 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:38:28 | D | + x - AbsMax +25-08-31 05:38:28 | D | + x = [min=3.8125, max=20.5000] +25-08-31 05:38:28 | D | + w - AbsMax +25-08-31 05:38:28 | D | + w = [min=0.1289, max=0.3145] +25-08-31 05:38:28 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:38:29 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:40:14 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:40:14 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:40:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:40:14 | D | - sum error = [ 9089.9078, 9087.6003, 9086.5188, 9097.1319, 9089.7349] +25-08-31 05:40:14 | D | - best error = [ 9089.9078, 9087.6003, 9086.5188, 9086.5188, 9086.5188] +25-08-31 05:40:14 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:40:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:40:14 | D | - sum error = [ 9094.8869, 9100.8923, 9116.1009, 9138.5579, 9146.4118] +25-08-31 05:40:14 | D | - best error = [ 9086.5188, 9086.5188, 9086.5188, 9086.5188, 9086.5188] +25-08-31 05:40:14 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:40:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:40:14 | D | - sum error = [ 9136.5280, 9120.8355, 9160.7131, 9168.7286, 9184.6903] +25-08-31 05:40:14 | D | - best error = [ 9086.5188, 9086.5188, 9086.5188, 9086.5188, 9086.5188] +25-08-31 05:40:14 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:40:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:40:14 | D | - sum error = [ 9178.2421, 9174.0460, 9196.1740, 9197.2309, 9212.5263] +25-08-31 05:40:14 | D | - best error = [ 9086.5188, 9086.5188, 9086.5188, 9086.5188, 9086.5188] +25-08-31 05:40:14 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:40:14 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:40:14 | D | - sum error = [ 9174.9795, 9178.1270, 9168.2360, 9143.9781, 9152.0817] +25-08-31 05:40:14 | D | - best error = [ 9086.5188, 9086.5188, 9086.5188, 9086.5188, 9086.5188] +25-08-31 05:40:14 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:40:14 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:40:14 | D | - sum error = [ 9129.3680, 9136.0959, 9127.2331, 9141.7565, 9137.5866] +25-08-31 05:40:14 | D | - best error = [ 9086.5188, 9086.5188, 9086.5188, 9086.5188, 9086.5188] +25-08-31 05:40:14 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:40:14 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:40:14 | D | - sum error = [ 9127.6902, 9160.8567, 9145.3210, 9176.1351, 9187.5651] +25-08-31 05:40:14 | D | - best error = [ 9086.5188, 9086.5188, 9086.5188, 9086.5188, 9086.5188] +25-08-31 05:40:14 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:40:14 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:40:14 | D | - sum error = [ 9206.7045, 9228.9605, 9211.3995, 9215.0846] +25-08-31 05:40:14 | D | - best error = [ 9086.5188, 9086.5188, 9086.5188, 9086.5188] +25-08-31 05:40:14 | D | + error = 9086.5188 +25-08-31 05:40:14 | D | + scale = [min=1.1432, max=1.3526] +25-08-31 05:40:14 | D | - transformer_blocks.16.ff.up_proj +25-08-31 05:40:14 | D | + w: sfp4_e2m1_all +25-08-31 05:40:14 | D | + x: sfp4_e2m1_all +25-08-31 05:40:14 | D | + y: None +25-08-31 05:40:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:40:14 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:40:15 | D | + x - AbsMax +25-08-31 05:40:15 | D | + x = [min=0.1777, max=6.4375] +25-08-31 05:40:15 | D | + w - AbsMax +25-08-31 05:40:15 | D | + w = [min=0.1094, max=0.6680] +25-08-31 05:40:15 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:40:16 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:41:30 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:41:30 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:41:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:41:30 | D | - sum error = [ 7590.7267, 7545.0731, 7499.7032, 7454.8685, 7416.8074] +25-08-31 05:41:30 | D | - best error = [ 7590.7267, 7545.0731, 7499.7032, 7454.8685, 7416.8074] +25-08-31 05:41:30 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:41:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:41:30 | D | - sum error = [ 7397.6526, 7373.0998, 7353.2673, 7335.5643, 7336.9067] +25-08-31 05:41:30 | D | - best error = [ 7397.6526, 7373.0998, 7353.2673, 7335.5643, 7335.5643] +25-08-31 05:41:30 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:41:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:41:30 | D | - sum error = [ 7328.4697, 7335.8822, 7324.9030, 7325.7406, 7328.8853] +25-08-31 05:41:30 | D | - best error = [ 7328.4697, 7328.4697, 7324.9030, 7324.9030, 7324.9030] +25-08-31 05:41:30 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:41:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:41:30 | D | - sum error = [ 7344.4608, 7364.2968, 7384.2635, 7403.2093, 7429.9906] +25-08-31 05:41:30 | D | - best error = [ 7324.9030, 7324.9030, 7324.9030, 7324.9030, 7324.9030] +25-08-31 05:41:30 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:41:30 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:41:30 | D | - sum error = [ 7694.8154, 7625.8374, 7562.6762, 7516.7131, 7480.1214] +25-08-31 05:41:30 | D | - best error = [ 7324.9030, 7324.9030, 7324.9030, 7324.9030, 7324.9030] +25-08-31 05:41:30 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:41:30 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:41:30 | D | - sum error = [ 7441.6119, 7417.8962, 7392.2449, 7367.5521, 7354.9140] +25-08-31 05:41:30 | D | - best error = [ 7324.9030, 7324.9030, 7324.9030, 7324.9030, 7324.9030] +25-08-31 05:41:30 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:41:30 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:41:30 | D | - sum error = [ 7341.2912, 7338.6566, 7342.6644, 7339.8640, 7355.9127] +25-08-31 05:41:30 | D | - best error = [ 7324.9030, 7324.9030, 7324.9030, 7324.9030, 7324.9030] +25-08-31 05:41:30 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:41:30 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:41:30 | D | - sum error = [ 7362.0774, 7383.3445, 7402.7833, 7431.5105] +25-08-31 05:41:30 | D | - best error = [ 7324.9030, 7324.9030, 7324.9030, 7324.9030] +25-08-31 05:41:30 | D | + error = 7324.9030 +25-08-31 05:41:30 | D | + scale = [min=0.3547, max=3.0565] +25-08-31 05:41:30 | D | - transformer_blocks.16.ff.down_proj +25-08-31 05:41:30 | D | + w: sfp4_e2m1_all +25-08-31 05:41:30 | D | + x: sfp4_e2m1_all +25-08-31 05:41:30 | D | + y: None +25-08-31 05:41:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:41:30 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:41:32 | D | + x - AbsMax +25-08-31 05:41:32 | D | + x = [min=0.0000, max=31.3750] +25-08-31 05:41:32 | D | + w - AbsMax +25-08-31 05:41:32 | D | + w = [min=0.0684, max=1.0547] +25-08-31 05:41:32 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:41:34 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:43:22 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:43:22 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:43:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:43:22 | D | - sum error = [ 6350.9288, 6365.5705, 6470.2847, 6335.3573, 6344.8811] +25-08-31 05:43:22 | D | - best error = [ 6350.9288, 6350.9288, 6350.9288, 6335.3573, 6335.3573] +25-08-31 05:43:22 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:43:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:43:22 | D | - sum error = [ 6354.2131, 6370.0471, 6380.0229, 6395.1144, 6406.3044] +25-08-31 05:43:22 | D | - best error = [ 6335.3573, 6335.3573, 6335.3573, 6335.3573, 6335.3573] +25-08-31 05:43:22 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:43:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:43:22 | D | - sum error = [ 6420.3769, 6451.8506, 6474.4714, 6492.2569, 6531.1065] +25-08-31 05:43:22 | D | - best error = [ 6335.3573, 6335.3573, 6335.3573, 6335.3573, 6335.3573] +25-08-31 05:43:22 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:43:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:43:22 | D | - sum error = [ 6564.3004, 6607.7912, 6646.1333, 6701.5541, 6772.1037] +25-08-31 05:43:22 | D | - best error = [ 6335.3573, 6335.3573, 6335.3573, 6335.3573, 6335.3573] +25-08-31 05:43:22 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:43:22 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:43:22 | D | - sum error = [ 6605.4290, 6566.8827, 6542.9153, 6502.6518, 6474.8220] +25-08-31 05:43:22 | D | - best error = [ 6335.3573, 6335.3573, 6335.3573, 6335.3573, 6335.3573] +25-08-31 05:43:22 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:43:22 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:43:22 | D | - sum error = [ 6457.5066, 6431.2848, 6415.4096, 6403.7024, 6527.3722] +25-08-31 05:43:22 | D | - best error = [ 6335.3573, 6335.3573, 6335.3573, 6335.3573, 6335.3573] +25-08-31 05:43:22 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:43:22 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:43:22 | D | - sum error = [ 6406.0843, 6424.8903, 6439.5723, 6466.7602, 6521.2186] +25-08-31 05:43:22 | D | - best error = [ 6335.3573, 6335.3573, 6335.3573, 6335.3573, 6335.3573] +25-08-31 05:43:22 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:43:22 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:43:22 | D | - sum error = [ 6564.8674, 6607.5118, 6664.4748, 6734.0413] +25-08-31 05:43:22 | D | - best error = [ 6335.3573, 6335.3573, 6335.3573, 6335.3573] +25-08-31 05:43:22 | D | + error = 6335.3573 +25-08-31 05:43:22 | D | + scale = [min=0.1360, max=1.6768] +25-08-31 05:43:22 | D | - transformer_blocks.16.ff_context.up_proj +25-08-31 05:43:22 | D | + w: sfp4_e2m1_all +25-08-31 05:43:22 | D | + x: sfp4_e2m1_all +25-08-31 05:43:22 | D | + y: None +25-08-31 05:43:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:43:22 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:43:22 | D | + x - AbsMax +25-08-31 05:43:22 | D | + x = [min=0.2480, max=39.5000] +25-08-31 05:43:22 | D | + w - AbsMax +25-08-31 05:43:22 | D | + w = [min=0.1152, max=1.0391] +25-08-31 05:43:22 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:43:22 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:44:05 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:44:05 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:44:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:44:05 | D | - sum error = [ 5953.3806, 5809.5225, 5731.0739, 5609.3543, 5523.2308] +25-08-31 05:44:05 | D | - best error = [ 5953.3806, 5809.5225, 5731.0739, 5609.3543, 5523.2308] +25-08-31 05:44:05 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:44:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:44:05 | D | - sum error = [ 5467.1625, 5420.0190, 5335.2340, 5328.6439, 5288.2154] +25-08-31 05:44:05 | D | - best error = [ 5467.1625, 5420.0190, 5335.2340, 5328.6439, 5288.2154] +25-08-31 05:44:05 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:44:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:44:05 | D | - sum error = [ 5248.0166, 5239.7872, 5237.9632, 5220.0967, 5230.4872] +25-08-31 05:44:05 | D | - best error = [ 5248.0166, 5239.7872, 5237.9632, 5220.0967, 5220.0967] +25-08-31 05:44:05 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:44:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:44:05 | D | - sum error = [ 5222.4044, 5252.1618, 5271.4121, 5273.8030, 5316.6083] +25-08-31 05:44:05 | D | - best error = [ 5220.0967, 5220.0967, 5220.0967, 5220.0967, 5220.0967] +25-08-31 05:44:05 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:44:05 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:44:05 | D | - sum error = [ 5962.5806, 5820.7082, 5734.8728, 5646.2545, 5514.5357] +25-08-31 05:44:05 | D | - best error = [ 5220.0967, 5220.0967, 5220.0967, 5220.0967, 5220.0967] +25-08-31 05:44:05 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:44:05 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:44:05 | D | - sum error = [ 5428.4305, 5356.4600, 5312.3114, 5281.0643, 5221.3954] +25-08-31 05:44:05 | D | - best error = [ 5220.0967, 5220.0967, 5220.0967, 5220.0967, 5220.0967] +25-08-31 05:44:05 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:44:05 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:44:05 | D | - sum error = [ 5226.0578, 5180.0299, 5193.1297, 5220.3270, 5206.8393] +25-08-31 05:44:05 | D | - best error = [ 5220.0967, 5180.0299, 5180.0299, 5180.0299, 5180.0299] +25-08-31 05:44:05 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:44:05 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:44:05 | D | - sum error = [ 5208.6056, 5244.9705, 5263.8955, 5286.1075] +25-08-31 05:44:05 | D | - best error = [ 5180.0299, 5180.0299, 5180.0299, 5180.0299] +25-08-31 05:44:05 | D | + error = 5180.0299 +25-08-31 05:44:05 | D | + scale = [min=0.7508, max=13.6101] +25-08-31 05:44:05 | D | - transformer_blocks.16.ff_context.down_proj +25-08-31 05:44:05 | D | + w: sfp4_e2m1_all +25-08-31 05:44:05 | D | + x: sfp4_e2m1_all +25-08-31 05:44:05 | D | + y: None +25-08-31 05:44:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:44:05 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:44:05 | D | + x - AbsMax +25-08-31 05:44:05 | D | + x = [min=0.0000, max=83.0000] +25-08-31 05:44:05 | D | + w - AbsMax +25-08-31 05:44:05 | D | + w = [min=0.0320, max=0.7188] +25-08-31 05:44:05 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:44:06 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:44:54 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:44:54 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:44:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:44:54 | D | - sum error = [ 1269.7824, 1194.0139, 1162.2981, 1123.5453, 1102.1212] +25-08-31 05:44:54 | D | - best error = [ 1269.7824, 1194.0139, 1162.2981, 1123.5453, 1102.1212] +25-08-31 05:44:54 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:44:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:44:54 | D | - sum error = [ 1070.1045, 1051.6082, 1036.7444, 1014.8274, 1010.8595] +25-08-31 05:44:54 | D | - best error = [ 1070.1045, 1051.6082, 1036.7444, 1014.8274, 1010.8595] +25-08-31 05:44:54 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:44:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:44:54 | D | - sum error = [ 1001.0775, 993.5441, 996.1282, 985.4779, 987.3692] +25-08-31 05:44:54 | D | - best error = [ 1001.0775, 993.5441, 993.5441, 985.4779, 985.4779] +25-08-31 05:44:54 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:44:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:44:54 | D | - sum error = [ 989.9243, 992.7738, 1000.1925, 1017.0749, 1035.5056] +25-08-31 05:44:54 | D | - best error = [ 985.4779, 985.4779, 985.4779, 985.4779, 985.4779] +25-08-31 05:44:54 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:44:54 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:44:54 | D | - sum error = [ 1284.6014, 1237.3652, 1203.4307, 1161.4540, 1139.9635] +25-08-31 05:44:54 | D | - best error = [ 985.4779, 985.4779, 985.4779, 985.4779, 985.4779] +25-08-31 05:44:54 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:44:54 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:44:54 | D | - sum error = [ 1113.6157, 1090.1494, 1072.6844, 1061.9885, 1044.8062] +25-08-31 05:44:54 | D | - best error = [ 985.4779, 985.4779, 985.4779, 985.4779, 985.4779] +25-08-31 05:44:54 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:44:54 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:44:54 | D | - sum error = [ 1036.9166, 1026.8678, 1028.3504, 1024.1075, 1022.7798] +25-08-31 05:44:54 | D | - best error = [ 985.4779, 985.4779, 985.4779, 985.4779, 985.4779] +25-08-31 05:44:54 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:44:54 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:44:54 | D | - sum error = [ 1021.4825, 1025.5821, 1032.9447, 1039.3972] +25-08-31 05:44:54 | D | - best error = [ 985.4779, 985.4779, 985.4779, 985.4779] +25-08-31 05:44:54 | D | + error = 985.4779 +25-08-31 05:44:54 | D | + scale = [min=0.0000, max=17.6767] +25-08-31 05:45:12 | D | - Smoothing Diffusion Block transformer_blocks.17 +25-08-31 05:45:12 | D | - Skipping Module transformer_blocks.17.norm1.linear +25-08-31 05:45:12 | D | - Skipping Module transformer_blocks.17.norm1_context.linear +25-08-31 05:45:12 | D | - Smoothing Transformer Block transformer_blocks.17 +25-08-31 05:45:12 | D | - transformer_blocks.17.attn.qkv_proj +25-08-31 05:45:12 | D | + w: sfp4_e2m1_all +25-08-31 05:45:12 | D | + x: sfp4_e2m1_all +25-08-31 05:45:12 | D | + y: None +25-08-31 05:45:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:45:12 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:45:13 | D | + x - AbsMax +25-08-31 05:45:13 | D | + x = [min=0.2637, max=19.0000] +25-08-31 05:45:13 | D | + w - AbsMax +25-08-31 05:45:13 | D | + w = [min=0.1108, max=0.8633] +25-08-31 05:45:13 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:45:14 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:46:30 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:46:30 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:46:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:46:30 | D | - sum error = [ 7189.9001, 7092.1994, 7114.1369, 6957.6605, 6988.9796] +25-08-31 05:46:30 | D | - best error = [ 7189.9001, 7092.1994, 7092.1994, 6957.6605, 6957.6605] +25-08-31 05:46:30 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:46:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:46:30 | D | - sum error = [ 6950.0958, 6912.1950, 6813.7461, 6882.6236, 6862.6598] +25-08-31 05:46:30 | D | - best error = [ 6950.0958, 6912.1950, 6813.7461, 6813.7461, 6813.7461] +25-08-31 05:46:30 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:46:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:46:30 | D | - sum error = [ 6823.2995, 6806.4302, 6843.1557, 6839.4257, 6853.1864] +25-08-31 05:46:30 | D | - best error = [ 6813.7461, 6806.4302, 6806.4302, 6806.4302, 6806.4302] +25-08-31 05:46:30 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:46:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:46:30 | D | - sum error = [ 6888.9451, 6938.8593, 6936.4838, 6926.5792, 6928.3666] +25-08-31 05:46:30 | D | - best error = [ 6806.4302, 6806.4302, 6806.4302, 6806.4302, 6806.4302] +25-08-31 05:46:30 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:46:30 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:46:30 | D | - sum error = [12851.0457, 11919.7879, 11054.3913, 10306.8765, 9832.4080] +25-08-31 05:46:30 | D | - best error = [ 6806.4302, 6806.4302, 6806.4302, 6806.4302, 6806.4302] +25-08-31 05:46:30 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:46:30 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:46:30 | D | - sum error = [ 9233.8079, 8901.5765, 8561.4531, 8423.9687, 8173.8901] +25-08-31 05:46:30 | D | - best error = [ 6806.4302, 6806.4302, 6806.4302, 6806.4302, 6806.4302] +25-08-31 05:46:30 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:46:30 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:46:30 | D | - sum error = [ 7895.4719, 7563.0941, 7404.9867, 7238.9939, 7211.1491] +25-08-31 05:46:30 | D | - best error = [ 6806.4302, 6806.4302, 6806.4302, 6806.4302, 6806.4302] +25-08-31 05:46:30 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:46:30 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:46:30 | D | - sum error = [ 7201.4221, 7097.7737, 6990.5150, 7017.8963] +25-08-31 05:46:30 | D | - best error = [ 6806.4302, 6806.4302, 6806.4302, 6806.4302] +25-08-31 05:46:30 | D | + error = 6806.4302 +25-08-31 05:46:30 | D | + scale = [min=0.4804, max=5.0503] +25-08-31 05:46:30 | D | - transformer_blocks.17.attn add_qkv_proj +25-08-31 05:46:30 | D | + w: sfp4_e2m1_all +25-08-31 05:46:30 | D | + x: sfp4_e2m1_all +25-08-31 05:46:30 | D | + y: None +25-08-31 05:46:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:46:30 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:46:31 | D | + x - AbsMax +25-08-31 05:46:31 | D | + x = [min=0.2266, max=35.5000] +25-08-31 05:46:31 | D | + w - AbsMax +25-08-31 05:46:31 | D | + w = [min=0.1152, max=0.6797] +25-08-31 05:46:31 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:46:31 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:47:39 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:47:39 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:47:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:47:39 | D | - sum error = [ 1445.3286, 1403.7587, 1401.3407, 1359.0838, 1346.5847] +25-08-31 05:47:39 | D | - best error = [ 1445.3286, 1403.7587, 1401.3407, 1359.0838, 1346.5847] +25-08-31 05:47:39 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:47:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:47:39 | D | - sum error = [ 1345.4181, 1303.3552, 1314.7058, 1304.3742, 1304.7697] +25-08-31 05:47:39 | D | - best error = [ 1345.4181, 1303.3552, 1303.3552, 1303.3552, 1303.3552] +25-08-31 05:47:39 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:47:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:47:39 | D | - sum error = [ 1263.1709, 1281.0552, 1275.5643, 1304.7958, 1305.8359] +25-08-31 05:47:39 | D | - best error = [ 1263.1709, 1263.1709, 1263.1709, 1263.1709, 1263.1709] +25-08-31 05:47:39 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:47:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:47:39 | D | - sum error = [ 1315.8964, 1340.5719, 1333.6259, 1328.2681, 1345.4371] +25-08-31 05:47:39 | D | - best error = [ 1263.1709, 1263.1709, 1263.1709, 1263.1709, 1263.1709] +25-08-31 05:47:39 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:47:39 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:47:39 | D | - sum error = [ 1844.1264, 1774.6298, 1734.0730, 1655.2452, 1596.2754] +25-08-31 05:47:39 | D | - best error = [ 1263.1709, 1263.1709, 1263.1709, 1263.1709, 1263.1709] +25-08-31 05:47:39 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:47:39 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:47:39 | D | - sum error = [ 1564.5965, 1515.7501, 1440.3175, 1423.1741, 1441.3586] +25-08-31 05:47:39 | D | - best error = [ 1263.1709, 1263.1709, 1263.1709, 1263.1709, 1263.1709] +25-08-31 05:47:39 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:47:39 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:47:39 | D | - sum error = [ 1403.8800, 1349.5348, 1388.3605, 1367.7952, 1364.9172] +25-08-31 05:47:39 | D | - best error = [ 1263.1709, 1263.1709, 1263.1709, 1263.1709, 1263.1709] +25-08-31 05:47:39 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:47:39 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:47:39 | D | - sum error = [ 1324.1893, 1357.0273, 1368.8547, 1385.3720] +25-08-31 05:47:39 | D | - best error = [ 1263.1709, 1263.1709, 1263.1709, 1263.1709] +25-08-31 05:47:39 | D | + error = 1263.1709 +25-08-31 05:47:39 | D | + scale = [min=0.4760, max=5.9582] +25-08-31 05:47:39 | D | - transformer_blocks.17.attn.out_proj + transformer_blocks.17.attn.add_out_proj +25-08-31 05:47:39 | D | + w: sfp4_e2m1_all +25-08-31 05:47:39 | D | + x: sfp4_e2m1_all +25-08-31 05:47:39 | D | + y: None +25-08-31 05:47:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:47:39 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:47:40 | D | + x - AbsMax +25-08-31 05:47:40 | D | + x = [min=4.0938, max=20.6250] +25-08-31 05:47:40 | D | + w - AbsMax +25-08-31 05:47:40 | D | + w = [min=0.1270, max=0.3477] +25-08-31 05:47:40 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:47:41 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:49:36 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:49:36 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:49:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:49:36 | D | - sum error = [10770.8822, 10786.2702, 10776.4944, 10799.8604, 10796.9444] +25-08-31 05:49:36 | D | - best error = [10770.8822, 10770.8822, 10770.8822, 10770.8822, 10770.8822] +25-08-31 05:49:36 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:49:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:49:36 | D | - sum error = [10811.9423, 10776.7494, 10790.1867, 10831.2034, 10828.7732] +25-08-31 05:49:36 | D | - best error = [10770.8822, 10770.8822, 10770.8822, 10770.8822, 10770.8822] +25-08-31 05:49:36 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:49:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:49:36 | D | - sum error = [10834.1327, 10835.3005, 10846.5253, 10883.9956, 10885.8686] +25-08-31 05:49:36 | D | - best error = [10770.8822, 10770.8822, 10770.8822, 10770.8822, 10770.8822] +25-08-31 05:49:36 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:49:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:49:36 | D | - sum error = [10869.9851, 10885.9682, 10904.3668, 10923.7613, 10913.0586] +25-08-31 05:49:36 | D | - best error = [10770.8822, 10770.8822, 10770.8822, 10770.8822, 10770.8822] +25-08-31 05:49:36 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:49:36 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:49:36 | D | - sum error = [10872.8337, 10873.3222, 10847.4587, 10837.4547, 10809.3689] +25-08-31 05:49:36 | D | - best error = [10770.8822, 10770.8822, 10770.8822, 10770.8822, 10770.8822] +25-08-31 05:49:36 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:49:36 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:49:36 | D | - sum error = [10812.0821, 10804.2273, 10807.9423, 10820.3856, 10795.8548] +25-08-31 05:49:36 | D | - best error = [10770.8822, 10770.8822, 10770.8822, 10770.8822, 10770.8822] +25-08-31 05:49:36 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:49:36 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:49:36 | D | - sum error = [10818.8487, 10834.9837, 10830.4157, 10875.0327, 10846.4785] +25-08-31 05:49:36 | D | - best error = [10770.8822, 10770.8822, 10770.8822, 10770.8822, 10770.8822] +25-08-31 05:49:36 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:49:36 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:49:36 | D | - sum error = [10845.4746, 10866.9919, 10881.4808, 10907.6549] +25-08-31 05:49:36 | D | - best error = [10770.8822, 10770.8822, 10770.8822, 10770.8822] +25-08-31 05:49:36 | D | + error = 10770.8822 +25-08-31 05:49:36 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 05:49:36 | D | - transformer_blocks.17.ff.up_proj +25-08-31 05:49:36 | D | + w: sfp4_e2m1_all +25-08-31 05:49:36 | D | + x: sfp4_e2m1_all +25-08-31 05:49:36 | D | + y: None +25-08-31 05:49:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:49:36 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:49:37 | D | + x - AbsMax +25-08-31 05:49:37 | D | + x = [min=0.1680, max=10.6875] +25-08-31 05:49:37 | D | + w - AbsMax +25-08-31 05:49:37 | D | + w = [min=0.0918, max=0.5234] +25-08-31 05:49:37 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:49:38 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:50:57 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:50:57 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:50:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:50:57 | D | - sum error = [ 7289.0822, 7249.9881, 7207.9087, 7175.9649, 7147.5862] +25-08-31 05:50:57 | D | - best error = [ 7289.0822, 7249.9881, 7207.9087, 7175.9649, 7147.5862] +25-08-31 05:50:57 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:50:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:50:57 | D | - sum error = [ 7118.0211, 7101.9465, 7081.6122, 7068.3155, 7051.5896] +25-08-31 05:50:57 | D | - best error = [ 7118.0211, 7101.9465, 7081.6122, 7068.3155, 7051.5896] +25-08-31 05:50:57 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:50:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:50:57 | D | - sum error = [ 7034.5644, 7045.6105, 7040.2335, 7036.9118, 7034.8356] +25-08-31 05:50:57 | D | - best error = [ 7034.5644, 7034.5644, 7034.5644, 7034.5644, 7034.5644] +25-08-31 05:50:57 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:50:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:50:57 | D | - sum error = [ 7053.0249, 7047.1739, 7054.8752, 7059.8909, 7071.8756] +25-08-31 05:50:57 | D | - best error = [ 7034.5644, 7034.5644, 7034.5644, 7034.5644, 7034.5644] +25-08-31 05:50:57 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:50:57 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:50:57 | D | - sum error = [ 7355.1745, 7303.9222, 7263.5380, 7222.4608, 7180.8498] +25-08-31 05:50:57 | D | - best error = [ 7034.5644, 7034.5644, 7034.5644, 7034.5644, 7034.5644] +25-08-31 05:50:57 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:50:57 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:50:57 | D | - sum error = [ 7144.1932, 7134.0544, 7093.4048, 7083.1304, 7061.8102] +25-08-31 05:50:57 | D | - best error = [ 7034.5644, 7034.5644, 7034.5644, 7034.5644, 7034.5644] +25-08-31 05:50:57 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:50:57 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:50:57 | D | - sum error = [ 7052.6231, 7045.6279, 7053.9147, 7038.9738, 7039.3676] +25-08-31 05:50:57 | D | - best error = [ 7034.5644, 7034.5644, 7034.5644, 7034.5644, 7034.5644] +25-08-31 05:50:57 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:50:57 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:50:57 | D | - sum error = [ 7043.2836, 7045.2909, 7057.1569, 7071.8512] +25-08-31 05:50:57 | D | - best error = [ 7034.5644, 7034.5644, 7034.5644, 7034.5644] +25-08-31 05:50:57 | D | + error = 7034.5644 +25-08-31 05:50:57 | D | + scale = [min=0.4098, max=3.2692] +25-08-31 05:50:58 | D | - transformer_blocks.17.ff.down_proj +25-08-31 05:50:58 | D | + w: sfp4_e2m1_all +25-08-31 05:50:58 | D | + x: sfp4_e2m1_all +25-08-31 05:50:58 | D | + y: None +25-08-31 05:50:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:50:58 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:51:00 | D | + x - AbsMax +25-08-31 05:51:00 | D | + x = [min=0.0311, max=69.0000] +25-08-31 05:51:00 | D | + w - AbsMax +25-08-31 05:51:00 | D | + w = [min=0.1113, max=1.1562] +25-08-31 05:51:00 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:51:04 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:53:03 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:53:03 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:53:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:53:03 | D | - sum error = [ 6290.9802, 6292.7768, 6280.0099, 6265.1065, 6266.1736] +25-08-31 05:53:03 | D | - best error = [ 6290.9802, 6290.9802, 6280.0099, 6265.1065, 6265.1065] +25-08-31 05:53:03 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:53:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:53:03 | D | - sum error = [ 6278.7938, 6279.0509, 6289.0508, 6299.1263, 6316.6589] +25-08-31 05:53:03 | D | - best error = [ 6265.1065, 6265.1065, 6265.1065, 6265.1065, 6265.1065] +25-08-31 05:53:03 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:53:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:53:03 | D | - sum error = [ 6336.4286, 6352.2689, 6374.1013, 6387.9853, 6407.1357] +25-08-31 05:53:03 | D | - best error = [ 6265.1065, 6265.1065, 6265.1065, 6265.1065, 6265.1065] +25-08-31 05:53:03 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:53:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:53:03 | D | - sum error = [ 6533.2571, 6471.0410, 6513.2609, 6542.7001, 6589.8652] +25-08-31 05:53:03 | D | - best error = [ 6265.1065, 6265.1065, 6265.1065, 6265.1065, 6265.1065] +25-08-31 05:53:03 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:53:03 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:53:03 | D | - sum error = [ 6756.5402, 6669.7760, 6613.7118, 6555.9689, 6504.5713] +25-08-31 05:53:03 | D | - best error = [ 6265.1065, 6265.1065, 6265.1065, 6265.1065, 6265.1065] +25-08-31 05:53:03 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:53:03 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:53:03 | D | - sum error = [ 6464.1114, 6429.8009, 6487.2481, 6368.3078, 6359.6135] +25-08-31 05:53:03 | D | - best error = [ 6265.1065, 6265.1065, 6265.1065, 6265.1065, 6265.1065] +25-08-31 05:53:03 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:53:03 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:53:03 | D | - sum error = [ 6351.5437, 6358.3164, 6380.4244, 6389.9865, 6409.8786] +25-08-31 05:53:03 | D | - best error = [ 6265.1065, 6265.1065, 6265.1065, 6265.1065, 6265.1065] +25-08-31 05:53:03 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:53:03 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:53:03 | D | - sum error = [ 6434.9480, 6480.0660, 6526.2234, 6576.5541] +25-08-31 05:53:03 | D | - best error = [ 6265.1065, 6265.1065, 6265.1065, 6265.1065] +25-08-31 05:53:03 | D | + error = 6265.1065 +25-08-31 05:53:03 | D | + scale = [min=0.5943, max=1.8872] +25-08-31 05:53:04 | D | - transformer_blocks.17.ff_context.up_proj +25-08-31 05:53:04 | D | + w: sfp4_e2m1_all +25-08-31 05:53:04 | D | + x: sfp4_e2m1_all +25-08-31 05:53:04 | D | + y: None +25-08-31 05:53:04 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:53:04 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:53:04 | D | + x - AbsMax +25-08-31 05:53:04 | D | + x = [min=0.2363, max=44.7500] +25-08-31 05:53:04 | D | + w - AbsMax +25-08-31 05:53:04 | D | + w = [min=0.1074, max=0.6328] +25-08-31 05:53:04 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:53:04 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:53:48 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:53:48 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:53:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:53:48 | D | - sum error = [ 6191.0642, 6098.6923, 5966.4117, 5847.6433, 5776.2150] +25-08-31 05:53:48 | D | - best error = [ 6191.0642, 6098.6923, 5966.4117, 5847.6433, 5776.2150] +25-08-31 05:53:48 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:53:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:53:48 | D | - sum error = [ 5696.7357, 5629.0392, 5549.1293, 5514.7301, 5498.6835] +25-08-31 05:53:48 | D | - best error = [ 5696.7357, 5629.0392, 5549.1293, 5514.7301, 5498.6835] +25-08-31 05:53:48 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:53:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:53:48 | D | - sum error = [ 5466.5046, 5419.8443, 5419.9393, 5414.5356, 5398.6458] +25-08-31 05:53:48 | D | - best error = [ 5466.5046, 5419.8443, 5419.8443, 5414.5356, 5398.6458] +25-08-31 05:53:48 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:53:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:53:48 | D | - sum error = [ 5421.4349, 5420.8147, 5419.8707, 5421.0528, 5475.1815] +25-08-31 05:53:48 | D | - best error = [ 5398.6458, 5398.6458, 5398.6458, 5398.6458, 5398.6458] +25-08-31 05:53:48 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:53:48 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:53:48 | D | - sum error = [ 5875.6280, 5808.9490, 5690.5365, 5599.2253, 5568.3044] +25-08-31 05:53:48 | D | - best error = [ 5398.6458, 5398.6458, 5398.6458, 5398.6458, 5398.6458] +25-08-31 05:53:48 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:53:48 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:53:48 | D | - sum error = [ 5509.7996, 5471.1169, 5441.6369, 5413.3858, 5383.0534] +25-08-31 05:53:48 | D | - best error = [ 5398.6458, 5398.6458, 5398.6458, 5398.6458, 5383.0534] +25-08-31 05:53:48 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:53:48 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:53:48 | D | - sum error = [ 5362.5091, 5346.6756, 5338.1585, 5370.1996, 5345.4328] +25-08-31 05:53:48 | D | - best error = [ 5362.5091, 5346.6756, 5338.1585, 5338.1585, 5338.1585] +25-08-31 05:53:48 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:53:48 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:53:48 | D | - sum error = [ 5398.7800, 5417.0682, 5438.1008, 5484.8010] +25-08-31 05:53:48 | D | - best error = [ 5338.1585, 5338.1585, 5338.1585, 5338.1585] +25-08-31 05:53:48 | D | + error = 5338.1585 +25-08-31 05:53:48 | D | + scale = [min=0.6697, max=23.4499] +25-08-31 05:53:48 | D | - transformer_blocks.17.ff_context.down_proj +25-08-31 05:53:48 | D | + w: sfp4_e2m1_all +25-08-31 05:53:48 | D | + x: sfp4_e2m1_all +25-08-31 05:53:48 | D | + y: None +25-08-31 05:53:48 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:53:48 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-31 05:53:48 | D | + x - AbsMax +25-08-31 05:53:48 | D | + x = [min=0.0000, max=69.0000] +25-08-31 05:53:48 | D | + w - AbsMax +25-08-31 05:53:48 | D | + w = [min=0.0391, max=0.7500] +25-08-31 05:53:48 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-31 05:53:49 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-31 05:54:38 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:54:38 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:54:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:54:38 | D | - sum error = [ 2001.3876, 1950.4194, 1894.5497, 1860.6751, 1842.2245] +25-08-31 05:54:38 | D | - best error = [ 2001.3876, 1950.4194, 1894.5497, 1860.6751, 1842.2245] +25-08-31 05:54:38 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:54:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:54:38 | D | - sum error = [ 1802.8167, 1797.3564, 1769.2148, 1754.9784, 1743.2833] +25-08-31 05:54:38 | D | - best error = [ 1802.8167, 1797.3564, 1769.2148, 1754.9784, 1743.2833] +25-08-31 05:54:38 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:54:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:54:38 | D | - sum error = [ 1831.4943, 1735.2150, 1736.8647, 1741.5430, 1751.5555] +25-08-31 05:54:38 | D | - best error = [ 1743.2833, 1735.2150, 1735.2150, 1735.2150, 1735.2150] +25-08-31 05:54:38 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:54:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:54:38 | D | - sum error = [ 1760.2457, 1767.3241, 1794.5296, 1816.0434, 1843.7044] +25-08-31 05:54:38 | D | - best error = [ 1735.2150, 1735.2150, 1735.2150, 1735.2150, 1735.2150] +25-08-31 05:54:38 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:54:38 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:54:38 | D | - sum error = [ 2077.1887, 2016.6024, 1972.2326, 1948.9232, 1927.8956] +25-08-31 05:54:38 | D | - best error = [ 1735.2150, 1735.2150, 1735.2150, 1735.2150, 1735.2150] +25-08-31 05:54:38 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:54:38 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:54:38 | D | - sum error = [ 1883.4448, 1863.6296, 1825.5337, 1824.8408, 1809.9807] +25-08-31 05:54:38 | D | - best error = [ 1735.2150, 1735.2150, 1735.2150, 1735.2150, 1735.2150] +25-08-31 05:54:38 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:54:38 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:54:38 | D | - sum error = [ 1802.1669, 1795.8195, 1806.4806, 1796.7661, 1802.4914] +25-08-31 05:54:38 | D | - best error = [ 1735.2150, 1735.2150, 1735.2150, 1735.2150, 1735.2150] +25-08-31 05:54:38 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:54:38 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:54:38 | D | - sum error = [ 1812.3581, 1816.4555, 1835.2824, 1854.9825] +25-08-31 05:54:38 | D | - best error = [ 1735.2150, 1735.2150, 1735.2150, 1735.2150] +25-08-31 05:54:38 | D | + error = 1735.2150 +25-08-31 05:54:38 | D | + scale = [min=0.0002, max=10.2652] +25-08-31 05:54:57 | D | - Smoothing Diffusion Block transformer_blocks.18 +25-08-31 05:54:57 | D | - Skipping Module transformer_blocks.18.norm1.linear +25-08-31 05:54:57 | D | - Skipping Module transformer_blocks.18.norm1_context.linear +25-08-31 05:54:57 | D | - Smoothing Transformer Block transformer_blocks.18 +25-08-31 05:54:57 | D | - transformer_blocks.18.attn.qkv_proj +25-08-31 05:54:57 | D | + w: sfp4_e2m1_all +25-08-31 05:54:57 | D | + x: sfp4_e2m1_all +25-08-31 05:54:57 | D | + y: None +25-08-31 05:54:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:54:57 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 05:54:57 | D | + x - AbsMax +25-08-31 05:54:57 | D | + x = [min=0.1777, max=16.1250] +25-08-31 05:54:57 | D | + w - AbsMax +25-08-31 05:54:57 | D | + w = [min=0.1094, max=0.9492] +25-08-31 05:54:57 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 05:54:58 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 05:56:14 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:56:14 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:56:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:56:14 | D | - sum error = [ 8214.3250, 8227.6874, 8128.5266, 8063.5833, 8031.8506] +25-08-31 05:56:14 | D | - best error = [ 8214.3250, 8214.3250, 8128.5266, 8063.5833, 8031.8506] +25-08-31 05:56:14 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:56:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:56:14 | D | - sum error = [ 7960.3028, 7936.5542, 7957.5489, 7908.3123, 7933.8319] +25-08-31 05:56:14 | D | - best error = [ 7960.3028, 7936.5542, 7936.5542, 7908.3123, 7908.3123] +25-08-31 05:56:14 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:56:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:56:14 | D | - sum error = [ 7969.6311, 7990.7807, 7901.7722, 7873.1597, 7898.6963] +25-08-31 05:56:14 | D | - best error = [ 7908.3123, 7908.3123, 7901.7722, 7873.1597, 7873.1597] +25-08-31 05:56:14 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:56:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:56:14 | D | - sum error = [ 7867.0805, 7896.6720, 7958.0802, 7966.1634, 7937.3444] +25-08-31 05:56:14 | D | - best error = [ 7867.0805, 7867.0805, 7867.0805, 7867.0805, 7867.0805] +25-08-31 05:56:14 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:56:14 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:56:14 | D | - sum error = [13229.0662, 12079.9417, 11331.1006, 10744.1567, 10275.3055] +25-08-31 05:56:14 | D | - best error = [ 7867.0805, 7867.0805, 7867.0805, 7867.0805, 7867.0805] +25-08-31 05:56:14 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:56:14 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:56:14 | D | - sum error = [ 9706.5833, 9378.8387, 9009.5714, 8810.5423, 8666.7932] +25-08-31 05:56:14 | D | - best error = [ 7867.0805, 7867.0805, 7867.0805, 7867.0805, 7867.0805] +25-08-31 05:56:14 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:56:14 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:56:14 | D | - sum error = [ 8529.5941, 8375.3152, 8147.6225, 8119.4010, 8044.8023] +25-08-31 05:56:14 | D | - best error = [ 7867.0805, 7867.0805, 7867.0805, 7867.0805, 7867.0805] +25-08-31 05:56:14 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:56:14 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:56:14 | D | - sum error = [ 8031.5625, 8013.4120, 7972.7902, 7931.3368] +25-08-31 05:56:14 | D | - best error = [ 7867.0805, 7867.0805, 7867.0805, 7867.0805] +25-08-31 05:56:14 | D | + error = 7867.0805 +25-08-31 05:56:14 | D | + scale = [min=0.2737, max=8.0468] +25-08-31 05:56:14 | D | - transformer_blocks.18.attn add_qkv_proj +25-08-31 05:56:14 | D | + w: sfp4_e2m1_all +25-08-31 05:56:14 | D | + x: sfp4_e2m1_all +25-08-31 05:56:14 | D | + y: None +25-08-31 05:56:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:56:14 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 05:56:14 | D | + x - AbsMax +25-08-31 05:56:14 | D | + x = [min=0.2109, max=26.7500] +25-08-31 05:56:14 | D | + w - AbsMax +25-08-31 05:56:14 | D | + w = [min=0.1177, max=0.4453] +25-08-31 05:56:14 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 05:56:15 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 05:57:19 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:57:19 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:57:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:57:19 | D | - sum error = [ 1901.5076, 1879.1519, 1818.6077, 1809.1708, 1817.3120] +25-08-31 05:57:19 | D | - best error = [ 1901.5076, 1879.1519, 1818.6077, 1809.1708, 1809.1708] +25-08-31 05:57:19 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:57:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:57:19 | D | - sum error = [ 1781.4721, 1775.1141, 1797.5325, 1798.9086, 1774.2683] +25-08-31 05:57:19 | D | - best error = [ 1781.4721, 1775.1141, 1775.1141, 1775.1141, 1774.2683] +25-08-31 05:57:19 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:57:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:57:19 | D | - sum error = [ 1743.9166, 1743.2898, 1687.5213, 1698.7381, 1783.2435] +25-08-31 05:57:19 | D | - best error = [ 1743.9166, 1743.2898, 1687.5213, 1687.5213, 1687.5213] +25-08-31 05:57:19 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:57:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:57:19 | D | - sum error = [ 1743.3718, 1798.3116, 1730.0729, 1779.7255, 1757.8760] +25-08-31 05:57:19 | D | - best error = [ 1687.5213, 1687.5213, 1687.5213, 1687.5213, 1687.5213] +25-08-31 05:57:19 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:57:19 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:57:19 | D | - sum error = [ 2126.7334, 2089.6231, 2020.2542, 2011.8167, 1949.2436] +25-08-31 05:57:19 | D | - best error = [ 1687.5213, 1687.5213, 1687.5213, 1687.5213, 1687.5213] +25-08-31 05:57:19 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:57:19 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:57:19 | D | - sum error = [ 1941.4535, 1886.5501, 1884.1629, 1859.9292, 1816.9013] +25-08-31 05:57:19 | D | - best error = [ 1687.5213, 1687.5213, 1687.5213, 1687.5213, 1687.5213] +25-08-31 05:57:19 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:57:19 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:57:19 | D | - sum error = [ 1851.5564, 1875.3388, 2021.8780, 1819.3690, 1857.1689] +25-08-31 05:57:19 | D | - best error = [ 1687.5213, 1687.5213, 1687.5213, 1687.5213, 1687.5213] +25-08-31 05:57:19 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:57:19 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:57:19 | D | - sum error = [ 1792.9686, 1778.4583, 1742.9897, 1761.8226] +25-08-31 05:57:19 | D | - best error = [ 1687.5213, 1687.5213, 1687.5213, 1687.5213] +25-08-31 05:57:19 | D | + error = 1687.5213 +25-08-31 05:57:19 | D | + scale = [min=0.3931, max=7.1845] +25-08-31 05:57:19 | D | - transformer_blocks.18.attn.out_proj + transformer_blocks.18.attn.add_out_proj +25-08-31 05:57:19 | D | + w: sfp4_e2m1_all +25-08-31 05:57:19 | D | + x: sfp4_e2m1_all +25-08-31 05:57:19 | D | + y: None +25-08-31 05:57:19 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:57:19 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 05:57:20 | D | + x - AbsMax +25-08-31 05:57:20 | D | + x = [min=4.1562, max=25.7500] +25-08-31 05:57:20 | D | + w - AbsMax +25-08-31 05:57:20 | D | + w = [min=0.1260, max=0.4414] +25-08-31 05:57:20 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 05:57:21 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 05:59:07 | D | - x / w range = AbsMax / AbsMax +25-08-31 05:59:07 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 05:59:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:59:07 | D | - sum error = [15384.4737, 15351.6628, 15348.0151, 15331.0347, 15342.5432] +25-08-31 05:59:07 | D | - best error = [15384.4737, 15351.6628, 15348.0151, 15331.0347, 15331.0347] +25-08-31 05:59:07 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 05:59:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:59:07 | D | - sum error = [15358.8322, 15327.3053, 15354.9247, 15325.3745, 15337.1705] +25-08-31 05:59:07 | D | - best error = [15331.0347, 15327.3053, 15327.3053, 15325.3745, 15325.3745] +25-08-31 05:59:07 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 05:59:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:59:07 | D | - sum error = [15320.1065, 15313.9048, 15323.0154, 15350.4825, 15396.4058] +25-08-31 05:59:07 | D | - best error = [15320.1065, 15313.9048, 15313.9048, 15313.9048, 15313.9048] +25-08-31 05:59:07 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:59:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 05:59:07 | D | - sum error = [15458.4387, 15457.5170, 15483.9427, 15526.8346, 15487.9065] +25-08-31 05:59:07 | D | - best error = [15313.9048, 15313.9048, 15313.9048, 15313.9048, 15313.9048] +25-08-31 05:59:07 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 05:59:07 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 05:59:07 | D | - sum error = [15541.6818, 15491.5523, 15436.2866, 15436.8017, 15444.3331] +25-08-31 05:59:07 | D | - best error = [15313.9048, 15313.9048, 15313.9048, 15313.9048, 15313.9048] +25-08-31 05:59:07 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 05:59:07 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 05:59:07 | D | - sum error = [15455.9400, 15423.5254, 15425.2230, 15401.6602, 15390.2301] +25-08-31 05:59:07 | D | - best error = [15313.9048, 15313.9048, 15313.9048, 15313.9048, 15313.9048] +25-08-31 05:59:07 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 05:59:07 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 05:59:07 | D | - sum error = [15394.3216, 15391.4345, 15387.1624, 15420.5302, 15424.0207] +25-08-31 05:59:07 | D | - best error = [15313.9048, 15313.9048, 15313.9048, 15313.9048, 15313.9048] +25-08-31 05:59:07 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 05:59:07 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 05:59:07 | D | - sum error = [15427.0282, 15482.1668, 15498.5264, 15552.6543] +25-08-31 05:59:07 | D | - best error = [15313.9048, 15313.9048, 15313.9048, 15313.9048] +25-08-31 05:59:07 | D | + error = 15313.9048 +25-08-31 05:59:07 | D | + scale = [min=2.1892, max=5.9694] +25-08-31 05:59:07 | D | - transformer_blocks.18.ff.up_proj +25-08-31 05:59:07 | D | + w: sfp4_e2m1_all +25-08-31 05:59:07 | D | + x: sfp4_e2m1_all +25-08-31 05:59:07 | D | + y: None +25-08-31 05:59:07 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 05:59:07 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 05:59:08 | D | + x - AbsMax +25-08-31 05:59:08 | D | + x = [min=0.2676, max=11.8750] +25-08-31 05:59:08 | D | + w - AbsMax +25-08-31 05:59:08 | D | + w = [min=0.1099, max=0.6992] +25-08-31 05:59:08 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 05:59:09 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:00:25 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:00:25 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:00:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:00:25 | D | - sum error = [ 7336.9893, 7285.0359, 7249.7688, 7218.1596, 7180.4785] +25-08-31 06:00:25 | D | - best error = [ 7336.9893, 7285.0359, 7249.7688, 7218.1596, 7180.4785] +25-08-31 06:00:25 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:00:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:00:25 | D | - sum error = [ 7151.6485, 7119.3842, 7108.0506, 7091.2885, 7083.6931] +25-08-31 06:00:25 | D | - best error = [ 7151.6485, 7119.3842, 7108.0506, 7091.2885, 7083.6931] +25-08-31 06:00:25 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:00:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:00:25 | D | - sum error = [ 7068.8404, 7068.7569, 7068.6434, 7070.0693, 7088.1467] +25-08-31 06:00:25 | D | - best error = [ 7068.8404, 7068.7569, 7068.6434, 7068.6434, 7068.6434] +25-08-31 06:00:25 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:00:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:00:25 | D | - sum error = [ 7081.5145, 7084.3320, 7096.5561, 7115.4381, 7130.8025] +25-08-31 06:00:25 | D | - best error = [ 7068.6434, 7068.6434, 7068.6434, 7068.6434, 7068.6434] +25-08-31 06:00:25 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:00:25 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:00:25 | D | - sum error = [ 8144.7880, 7964.7162, 7828.9197, 7676.4935, 7578.3457] +25-08-31 06:00:25 | D | - best error = [ 7068.6434, 7068.6434, 7068.6434, 7068.6434, 7068.6434] +25-08-31 06:00:25 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:00:25 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:00:25 | D | - sum error = [ 7487.7431, 7409.8955, 7340.4409, 7281.6276, 7225.3396] +25-08-31 06:00:25 | D | - best error = [ 7068.6434, 7068.6434, 7068.6434, 7068.6434, 7068.6434] +25-08-31 06:00:25 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:00:25 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:00:25 | D | - sum error = [ 7194.5663, 7158.8387, 7144.8135, 7133.6142, 7109.7994] +25-08-31 06:00:25 | D | - best error = [ 7068.6434, 7068.6434, 7068.6434, 7068.6434, 7068.6434] +25-08-31 06:00:25 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:00:25 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:00:25 | D | - sum error = [ 7103.7247, 7112.3169, 7119.0168, 7121.5240] +25-08-31 06:00:25 | D | - best error = [ 7068.6434, 7068.6434, 7068.6434, 7068.6434] +25-08-31 06:00:25 | D | + error = 7068.6434 +25-08-31 06:00:25 | D | + scale = [min=0.4534, max=4.4135] +25-08-31 06:00:25 | D | - transformer_blocks.18.ff.down_proj +25-08-31 06:00:25 | D | + w: sfp4_e2m1_all +25-08-31 06:00:25 | D | + x: sfp4_e2m1_all +25-08-31 06:00:25 | D | + y: None +25-08-31 06:00:25 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:00:25 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:00:28 | D | + x - AbsMax +25-08-31 06:00:28 | D | + x = [min=1.1250, max=201.0000] +25-08-31 06:00:28 | D | + w - AbsMax +25-08-31 06:00:28 | D | + w = [min=0.1113, max=1.4609] +25-08-31 06:00:28 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:00:30 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:02:24 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:02:24 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:02:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:02:24 | D | - sum error = [ 7897.1706, 7778.3165, 7714.2554, 7638.2367, 7606.4630] +25-08-31 06:02:24 | D | - best error = [ 7897.1706, 7778.3165, 7714.2554, 7638.2367, 7606.4630] +25-08-31 06:02:24 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:02:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:02:24 | D | - sum error = [ 7537.4293, 7512.5538, 7482.7169, 7455.5493, 7454.9860] +25-08-31 06:02:24 | D | - best error = [ 7537.4293, 7512.5538, 7482.7169, 7455.5493, 7454.9860] +25-08-31 06:02:24 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:02:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:02:24 | D | - sum error = [ 7456.0346, 7467.7836, 7459.3040, 7474.6599, 7490.0980] +25-08-31 06:02:24 | D | - best error = [ 7454.9860, 7454.9860, 7454.9860, 7454.9860, 7454.9860] +25-08-31 06:02:24 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:02:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:02:24 | D | - sum error = [ 7525.3045, 7548.8855, 7585.8265, 7685.3934, 7690.3251] +25-08-31 06:02:24 | D | - best error = [ 7454.9860, 7454.9860, 7454.9860, 7454.9860, 7454.9860] +25-08-31 06:02:24 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:02:24 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:02:24 | D | - sum error = [12069.8759, 10484.9317, 9521.5255, 8820.3986, 8426.0980] +25-08-31 06:02:24 | D | - best error = [ 7454.9860, 7454.9860, 7454.9860, 7454.9860, 7454.9860] +25-08-31 06:02:24 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:02:24 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:02:24 | D | - sum error = [ 8169.3757, 7994.0996, 7855.5585, 7696.9838, 7584.6250] +25-08-31 06:02:24 | D | - best error = [ 7454.9860, 7454.9860, 7454.9860, 7454.9860, 7454.9860] +25-08-31 06:02:24 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:02:24 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:02:24 | D | - sum error = [ 7526.7520, 7493.9610, 7470.1004, 7459.6509, 7475.2442] +25-08-31 06:02:24 | D | - best error = [ 7454.9860, 7454.9860, 7454.9860, 7454.9860, 7454.9860] +25-08-31 06:02:24 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:02:24 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:02:24 | D | - sum error = [ 7478.4766, 7527.5072, 7604.3493, 7640.5775] +25-08-31 06:02:24 | D | - best error = [ 7454.9860, 7454.9860, 7454.9860, 7454.9860] +25-08-31 06:02:24 | D | + error = 7454.9860 +25-08-31 06:02:24 | D | + scale = [min=1.0544, max=10.8752] +25-08-31 06:02:24 | D | - transformer_blocks.18.ff_context.up_proj +25-08-31 06:02:24 | D | + w: sfp4_e2m1_all +25-08-31 06:02:24 | D | + x: sfp4_e2m1_all +25-08-31 06:02:24 | D | + y: None +25-08-31 06:02:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:02:24 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:02:24 | D | + x - AbsMax +25-08-31 06:02:24 | D | + x = [min=0.4258, max=122.0000] +25-08-31 06:02:24 | D | + w - AbsMax +25-08-31 06:02:24 | D | + w = [min=0.1406, max=0.9375] +25-08-31 06:02:24 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:02:25 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:03:08 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:03:08 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:03:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:03:08 | D | - sum error = [10133.1527, 9573.0961, 9104.5589, 8670.6729, 8264.4642] +25-08-31 06:03:08 | D | - best error = [10133.1527, 9573.0961, 9104.5589, 8670.6729, 8264.4642] +25-08-31 06:03:08 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:03:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:03:08 | D | - sum error = [ 7877.6773, 7537.3133, 7277.2286, 6990.2990, 6712.1901] +25-08-31 06:03:08 | D | - best error = [ 7877.6773, 7537.3133, 7277.2286, 6990.2990, 6712.1901] +25-08-31 06:03:08 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:03:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:03:08 | D | - sum error = [ 6526.7977, 6342.0779, 6194.8462, 6110.3339, 6007.9141] +25-08-31 06:03:08 | D | - best error = [ 6526.7977, 6342.0779, 6194.8462, 6110.3339, 6007.9141] +25-08-31 06:03:08 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:03:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:03:08 | D | - sum error = [ 5976.4722, 5952.5225, 5957.4135, 6007.4787, 6066.7876] +25-08-31 06:03:08 | D | - best error = [ 5976.4722, 5952.5225, 5952.5225, 5952.5225, 5952.5225] +25-08-31 06:03:08 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:03:08 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:03:08 | D | - sum error = [11434.6421, 10666.8068, 9992.4387, 9330.0710, 8834.3570] +25-08-31 06:03:08 | D | - best error = [ 5952.5225, 5952.5225, 5952.5225, 5952.5225, 5952.5225] +25-08-31 06:03:08 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:03:08 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:03:08 | D | - sum error = [ 8322.9408, 7916.3555, 7529.4127, 7243.6626, 6876.3659] +25-08-31 06:03:08 | D | - best error = [ 5952.5225, 5952.5225, 5952.5225, 5952.5225, 5952.5225] +25-08-31 06:03:08 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:03:08 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:03:08 | D | - sum error = [ 6668.3872, 6448.7041, 6250.1366, 6131.2997, 6046.6407] +25-08-31 06:03:08 | D | - best error = [ 5952.5225, 5952.5225, 5952.5225, 5952.5225, 5952.5225] +25-08-31 06:03:08 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:03:08 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:03:08 | D | - sum error = [ 5990.2447, 5964.2384, 5998.3304, 6063.6860] +25-08-31 06:03:08 | D | - best error = [ 5952.5225, 5952.5225, 5952.5225, 5952.5225] +25-08-31 06:03:08 | D | + error = 5952.5225 +25-08-31 06:03:08 | D | + scale = [min=0.5051, max=46.6754] +25-08-31 06:03:08 | D | - transformer_blocks.18.ff_context.down_proj +25-08-31 06:03:08 | D | + w: sfp4_e2m1_all +25-08-31 06:03:08 | D | + x: sfp4_e2m1_all +25-08-31 06:03:08 | D | + y: None +25-08-31 06:03:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:03:08 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:03:08 | D | + x - AbsMax +25-08-31 06:03:08 | D | + x = [min=0.0000, max=217.0000] +25-08-31 06:03:08 | D | + w - AbsMax +25-08-31 06:03:08 | D | + w = [min=0.0255, max=0.8125] +25-08-31 06:03:08 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:03:09 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:03:57 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:03:57 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:03:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:03:57 | D | - sum error = [ 6345.5635, 6169.6849, 5834.7024, 5674.8195, 5515.4430] +25-08-31 06:03:57 | D | - best error = [ 6345.5635, 6169.6849, 5834.7024, 5674.8195, 5515.4430] +25-08-31 06:03:57 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:03:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:03:57 | D | - sum error = [ 5351.3251, 5263.3959, 5180.7222, 5136.9908, 5028.0885] +25-08-31 06:03:57 | D | - best error = [ 5351.3251, 5263.3959, 5180.7222, 5136.9908, 5028.0885] +25-08-31 06:03:57 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:03:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:03:57 | D | - sum error = [ 5017.9789, 4978.4381, 4847.9703, 5520.2245, 4884.4117] +25-08-31 06:03:57 | D | - best error = [ 5017.9789, 4978.4381, 4847.9703, 4847.9703, 4847.9703] +25-08-31 06:03:57 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:03:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:03:57 | D | - sum error = [ 4864.0506, 4825.1400, 4847.0059, 4859.2658, 4912.1737] +25-08-31 06:03:57 | D | - best error = [ 4847.9703, 4825.1400, 4825.1400, 4825.1400, 4825.1400] +25-08-31 06:03:57 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:03:57 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:03:57 | D | - sum error = [11634.0597, 9958.5143, 9014.5709, 8335.9535, 7482.3670] +25-08-31 06:03:57 | D | - best error = [ 4825.1400, 4825.1400, 4825.1400, 4825.1400, 4825.1400] +25-08-31 06:03:57 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:03:57 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:03:57 | D | - sum error = [ 6950.3370, 6462.0904, 6079.4891, 5880.5816, 5594.2765] +25-08-31 06:03:57 | D | - best error = [ 4825.1400, 4825.1400, 4825.1400, 4825.1400, 4825.1400] +25-08-31 06:03:57 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:03:57 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:03:57 | D | - sum error = [ 5480.0276, 5295.5613, 5237.3173, 5083.6231, 5018.9077] +25-08-31 06:03:57 | D | - best error = [ 4825.1400, 4825.1400, 4825.1400, 4825.1400, 4825.1400] +25-08-31 06:03:57 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:03:57 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:03:57 | D | - sum error = [ 4974.7127, 4917.5892, 4882.8103, 4947.7860] +25-08-31 06:03:57 | D | - best error = [ 4825.1400, 4825.1400, 4825.1400, 4825.1400] +25-08-31 06:03:57 | D | + error = 4825.1400 +25-08-31 06:03:57 | D | + scale = [min=0.0000, max=73.9891] +25-08-31 06:04:16 | D | - Smoothing Diffusion Block single_transformer_blocks.0 +25-08-31 06:04:16 | D | - Skipping Module single_transformer_blocks.0.norm.linear +25-08-31 06:04:16 | D | - Smoothing Transformer Block single_transformer_blocks.0 +25-08-31 06:04:16 | D | - single_transformer_blocks.0.attn.qkv_proj + single_transformer_blocks.0.up_proj +25-08-31 06:04:16 | D | + w: sfp4_e2m1_all +25-08-31 06:04:16 | D | + x: sfp4_e2m1_all +25-08-31 06:04:16 | D | + y: None +25-08-31 06:04:16 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:04:16 | D | + finished parsing calibration arguments, ram usage: 15.4 +25-08-31 06:04:16 | D | + x - AbsMax +25-08-31 06:04:16 | D | + x = [min=0.2266, max=45.7500] +25-08-31 06:04:16 | D | + w - AbsMax +25-08-31 06:04:16 | D | + w = [min=0.1152, max=0.5391] +25-08-31 06:04:16 | D | + finished resetting calibrator, ram usage: 15.4 +25-08-31 06:04:18 | D | + finished calculating the original outputs, ram usage: 15.4 +25-08-31 06:06:25 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:06:25 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:06:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:06:25 | D | - sum error = [13095.8448, 12942.4004, 12899.4239, 12831.2406, 12775.5379] +25-08-31 06:06:25 | D | - best error = [13095.8448, 12942.4004, 12899.4239, 12831.2406, 12775.5379] +25-08-31 06:06:25 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:06:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:06:25 | D | - sum error = [12742.2309, 12686.8411, 12600.3708, 12581.7226, 12660.6919] +25-08-31 06:06:25 | D | - best error = [12742.2309, 12686.8411, 12600.3708, 12581.7226, 12581.7226] +25-08-31 06:06:25 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:06:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:06:25 | D | - sum error = [12579.7291, 12637.3991, 12597.8163, 12611.3593, 12617.1109] +25-08-31 06:06:25 | D | - best error = [12579.7291, 12579.7291, 12579.7291, 12579.7291, 12579.7291] +25-08-31 06:06:25 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:06:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:06:25 | D | - sum error = [12675.8290, 12737.3774, 12795.6343, 12970.8880, 13040.6448] +25-08-31 06:06:25 | D | - best error = [12579.7291, 12579.7291, 12579.7291, 12579.7291, 12579.7291] +25-08-31 06:06:25 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:06:25 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:06:25 | D | - sum error = [14714.9709, 14312.9687, 13927.9047, 13735.3628, 13458.1651] +25-08-31 06:06:25 | D | - best error = [12579.7291, 12579.7291, 12579.7291, 12579.7291, 12579.7291] +25-08-31 06:06:25 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:06:25 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:06:25 | D | - sum error = [13317.0351, 13253.1192, 13205.7896, 13028.1744, 13010.2390] +25-08-31 06:06:25 | D | - best error = [12579.7291, 12579.7291, 12579.7291, 12579.7291, 12579.7291] +25-08-31 06:06:25 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:06:25 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:06:25 | D | - sum error = [12865.7051, 12837.0881, 12927.1287, 12810.1079, 12868.8132] +25-08-31 06:06:25 | D | - best error = [12579.7291, 12579.7291, 12579.7291, 12579.7291, 12579.7291] +25-08-31 06:06:25 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:06:25 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:06:25 | D | - sum error = [12927.8144, 12952.2446, 12986.4064, 13068.3300] +25-08-31 06:06:25 | D | - best error = [12579.7291, 12579.7291, 12579.7291, 12579.7291] +25-08-31 06:06:25 | D | + error = 12579.7291 +25-08-31 06:06:25 | D | + scale = [min=0.4760, max=6.7639] +25-08-31 06:06:26 | D | - single_transformer_blocks.0.attn.out_proj +25-08-31 06:06:26 | D | + w: sfp4_e2m1_all +25-08-31 06:06:26 | D | + x: sfp4_e2m1_all +25-08-31 06:06:26 | D | + y: None +25-08-31 06:06:26 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:06:26 | D | + finished parsing calibration arguments, ram usage: 15.4 +25-08-31 06:06:26 | D | + x - AbsMax +25-08-31 06:06:26 | D | + x = [min=1.6562, max=14.5000] +25-08-31 06:06:26 | D | + w - AbsMax +25-08-31 06:06:26 | D | + w = [min=0.1201, max=0.3164] +25-08-31 06:06:26 | D | + finished resetting calibrator, ram usage: 15.4 +25-08-31 06:06:27 | D | + finished calculating the original outputs, ram usage: 15.4 +25-08-31 06:07:27 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:07:27 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:07:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:07:27 | D | - sum error = [ 7160.4902, 7148.2306, 7136.8615, 7147.7261, 7149.6593] +25-08-31 06:07:27 | D | - best error = [ 7160.4902, 7148.2306, 7136.8615, 7136.8615, 7136.8615] +25-08-31 06:07:27 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:07:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:07:27 | D | - sum error = [ 7149.1250, 7151.5014, 7177.7935, 7175.7313, 7182.8805] +25-08-31 06:07:27 | D | - best error = [ 7136.8615, 7136.8615, 7136.8615, 7136.8615, 7136.8615] +25-08-31 06:07:27 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:07:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:07:27 | D | - sum error = [ 7207.6176, 7206.8123, 7236.2756, 7248.9635, 7254.4976] +25-08-31 06:07:27 | D | - best error = [ 7136.8615, 7136.8615, 7136.8615, 7136.8615, 7136.8615] +25-08-31 06:07:27 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:07:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:07:27 | D | - sum error = [ 7256.3955, 7273.9937, 7304.4543, 7318.6064, 7341.6200] +25-08-31 06:07:27 | D | - best error = [ 7136.8615, 7136.8615, 7136.8615, 7136.8615, 7136.8615] +25-08-31 06:07:27 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:07:27 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:07:27 | D | - sum error = [ 7197.0422, 7180.2752, 7184.5545, 7187.0436, 7188.0100] +25-08-31 06:07:27 | D | - best error = [ 7136.8615, 7136.8615, 7136.8615, 7136.8615, 7136.8615] +25-08-31 06:07:27 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:07:27 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:07:27 | D | - sum error = [ 7189.9346, 7184.5760, 7189.8904, 7194.7395, 7220.2803] +25-08-31 06:07:27 | D | - best error = [ 7136.8615, 7136.8615, 7136.8615, 7136.8615, 7136.8615] +25-08-31 06:07:27 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:07:27 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:07:27 | D | - sum error = [ 7230.0874, 7236.9822, 7240.6897, 7254.8662, 7270.5403] +25-08-31 06:07:27 | D | - best error = [ 7136.8615, 7136.8615, 7136.8615, 7136.8615, 7136.8615] +25-08-31 06:07:27 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:07:27 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:07:27 | D | - sum error = [ 7279.8956, 7297.5023, 7326.8503, 7351.7459] +25-08-31 06:07:27 | D | - best error = [ 7136.8615, 7136.8615, 7136.8615, 7136.8615] +25-08-31 06:07:27 | D | + error = 7136.8615 +25-08-31 06:07:27 | D | + scale = [min=1.0518, max=1.3066] +25-08-31 06:07:27 | D | - single_transformer_blocks.0.down_proj +25-08-31 06:07:27 | D | + w: sfp4_e2m1_all +25-08-31 06:07:27 | D | + x: sfp4_e2m1_all +25-08-31 06:07:27 | D | + y: None +25-08-31 06:07:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:07:27 | D | + finished parsing calibration arguments, ram usage: 15.4 +25-08-31 06:07:30 | D | + x - AbsMax +25-08-31 06:07:30 | D | + x = [min=1.8047, max=22.7500] +25-08-31 06:07:30 | D | + w - AbsMax +25-08-31 06:07:30 | D | + w = [min=0.1147, max=1.3672] +25-08-31 06:07:30 | D | + finished resetting calibrator, ram usage: 15.4 +25-08-31 06:07:32 | D | + finished calculating the original outputs, ram usage: 15.4 +25-08-31 06:09:35 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:09:35 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:09:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:09:35 | D | - sum error = [ 6939.2325, 6938.8561, 6932.3422, 6931.6745, 6945.1183] +25-08-31 06:09:35 | D | - best error = [ 6939.2325, 6938.8561, 6932.3422, 6931.6745, 6931.6745] +25-08-31 06:09:35 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:09:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:09:35 | D | - sum error = [ 6932.4547, 7060.3099, 6948.4418, 6960.0391, 6972.8023] +25-08-31 06:09:35 | D | - best error = [ 6931.6745, 6931.6745, 6931.6745, 6931.6745, 6931.6745] +25-08-31 06:09:35 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:09:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:09:35 | D | - sum error = [ 6972.9388, 6997.6373, 7012.3905, 7039.3110, 7049.4817] +25-08-31 06:09:35 | D | - best error = [ 6931.6745, 6931.6745, 6931.6745, 6931.6745, 6931.6745] +25-08-31 06:09:35 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:09:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:09:35 | D | - sum error = [ 7086.9373, 7120.6269, 7152.6907, 7195.3869, 7258.4536] +25-08-31 06:09:35 | D | - best error = [ 6931.6745, 6931.6745, 6931.6745, 6931.6745, 6931.6745] +25-08-31 06:09:35 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:09:35 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:09:35 | D | - sum error = [ 7310.3874, 7250.3898, 7207.4670, 7147.9379, 7124.9483] +25-08-31 06:09:35 | D | - best error = [ 6931.6745, 6931.6745, 6931.6745, 6931.6745, 6931.6745] +25-08-31 06:09:35 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:09:35 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:09:35 | D | - sum error = [ 7105.1629, 7051.4277, 7040.2128, 7030.3826, 7017.2411] +25-08-31 06:09:35 | D | - best error = [ 6931.6745, 6931.6745, 6931.6745, 6931.6745, 6931.6745] +25-08-31 06:09:35 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:09:35 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:09:35 | D | - sum error = [ 7002.9442, 6989.5971, 7005.7967, 7007.6121, 7033.4236] +25-08-31 06:09:35 | D | - best error = [ 6931.6745, 6931.6745, 6931.6745, 6931.6745, 6931.6745] +25-08-31 06:09:35 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:09:35 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:09:35 | D | - sum error = [ 7073.4198, 7112.3107, 7163.3408, 7217.0149] +25-08-31 06:09:35 | D | - best error = [ 6931.6745, 6931.6745, 6931.6745, 6931.6745] +25-08-31 06:09:35 | D | + error = 6931.6745 +25-08-31 06:09:35 | D | + scale = [min=1.0926, max=1.5979] +25-08-31 06:09:53 | D | - Smoothing Diffusion Block single_transformer_blocks.1 +25-08-31 06:09:53 | D | - Skipping Module single_transformer_blocks.1.norm.linear +25-08-31 06:09:53 | D | - Smoothing Transformer Block single_transformer_blocks.1 +25-08-31 06:09:53 | D | - single_transformer_blocks.1.attn.qkv_proj + single_transformer_blocks.1.up_proj +25-08-31 06:09:53 | D | + w: sfp4_e2m1_all +25-08-31 06:09:53 | D | + x: sfp4_e2m1_all +25-08-31 06:09:53 | D | + y: None +25-08-31 06:09:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:09:53 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:09:54 | D | + x - AbsMax +25-08-31 06:09:54 | D | + x = [min=0.2490, max=35.7500] +25-08-31 06:09:54 | D | + w - AbsMax +25-08-31 06:09:54 | D | + w = [min=0.0977, max=0.5117] +25-08-31 06:09:54 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:09:55 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:12:06 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:12:06 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:12:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:12:06 | D | - sum error = [14168.0311, 14165.2228, 14082.9497, 14204.8904, 13880.0340] +25-08-31 06:12:06 | D | - best error = [14168.0311, 14165.2228, 14082.9497, 14082.9497, 13880.0340] +25-08-31 06:12:06 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:12:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:12:06 | D | - sum error = [13896.5435, 13862.2521, 13799.5826, 13746.1785, 13783.2397] +25-08-31 06:12:06 | D | - best error = [13880.0340, 13862.2521, 13799.5826, 13746.1785, 13746.1785] +25-08-31 06:12:06 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:12:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:12:06 | D | - sum error = [13861.1619, 14177.5834, 14035.8917, 14038.3953, 14209.3378] +25-08-31 06:12:06 | D | - best error = [13746.1785, 13746.1785, 13746.1785, 13746.1785, 13746.1785] +25-08-31 06:12:06 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:12:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:12:06 | D | - sum error = [14227.7104, 14171.2272, 14191.9470, 14374.0004, 14516.7743] +25-08-31 06:12:06 | D | - best error = [13746.1785, 13746.1785, 13746.1785, 13746.1785, 13746.1785] +25-08-31 06:12:06 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:12:06 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:12:06 | D | - sum error = [15131.4967, 15015.0735, 14838.4522, 14614.9469, 14654.1172] +25-08-31 06:12:06 | D | - best error = [13746.1785, 13746.1785, 13746.1785, 13746.1785, 13746.1785] +25-08-31 06:12:06 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:12:06 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:12:06 | D | - sum error = [14349.0248, 14329.7788, 14445.8210, 14252.0006, 14286.7127] +25-08-31 06:12:06 | D | - best error = [13746.1785, 13746.1785, 13746.1785, 13746.1785, 13746.1785] +25-08-31 06:12:06 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:12:06 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:12:06 | D | - sum error = [14416.4495, 14133.7661, 14097.5538, 14235.5409, 14231.4694] +25-08-31 06:12:06 | D | - best error = [13746.1785, 13746.1785, 13746.1785, 13746.1785, 13746.1785] +25-08-31 06:12:06 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:12:06 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:12:06 | D | - sum error = [14195.8354, 14265.2579, 14407.6280, 14612.9198] +25-08-31 06:12:06 | D | - best error = [13746.1785, 13746.1785, 13746.1785, 13746.1785] +25-08-31 06:12:06 | D | + error = 13746.1785 +25-08-31 06:12:06 | D | + scale = [min=0.5735, max=4.1813] +25-08-31 06:12:06 | D | - single_transformer_blocks.1.attn.out_proj +25-08-31 06:12:06 | D | + w: sfp4_e2m1_all +25-08-31 06:12:06 | D | + x: sfp4_e2m1_all +25-08-31 06:12:06 | D | + y: None +25-08-31 06:12:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:12:06 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:12:07 | D | + x - AbsMax +25-08-31 06:12:07 | D | + x = [min=2.1250, max=19.5000] +25-08-31 06:12:07 | D | + w - AbsMax +25-08-31 06:12:07 | D | + w = [min=0.1172, max=0.5586] +25-08-31 06:12:07 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:12:07 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:13:05 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:13:05 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:13:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:13:05 | D | - sum error = [ 6137.9808, 6132.6552, 6137.8667, 6122.7673, 6132.4031] +25-08-31 06:13:05 | D | - best error = [ 6137.9808, 6132.6552, 6132.6552, 6122.7673, 6122.7673] +25-08-31 06:13:05 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:13:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:13:05 | D | - sum error = [ 6134.9204, 6142.6688, 6150.8120, 6159.8227, 6168.7399] +25-08-31 06:13:05 | D | - best error = [ 6122.7673, 6122.7673, 6122.7673, 6122.7673, 6122.7673] +25-08-31 06:13:05 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:13:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:13:05 | D | - sum error = [ 6183.4175, 6185.9425, 6190.6294, 6215.2516, 6238.6867] +25-08-31 06:13:05 | D | - best error = [ 6122.7673, 6122.7673, 6122.7673, 6122.7673, 6122.7673] +25-08-31 06:13:05 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:13:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:13:05 | D | - sum error = [ 6254.0355, 6258.8037, 6285.0671, 6326.9174, 6350.5634] +25-08-31 06:13:05 | D | - best error = [ 6122.7673, 6122.7673, 6122.7673, 6122.7673, 6122.7673] +25-08-31 06:13:05 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:13:05 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:13:05 | D | - sum error = [ 6191.1753, 6195.8802, 6209.5607, 6192.8081, 6186.5894] +25-08-31 06:13:05 | D | - best error = [ 6122.7673, 6122.7673, 6122.7673, 6122.7673, 6122.7673] +25-08-31 06:13:05 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:13:05 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:13:05 | D | - sum error = [ 6187.1645, 6190.0759, 6187.0626, 6200.3448, 6200.4548] +25-08-31 06:13:05 | D | - best error = [ 6122.7673, 6122.7673, 6122.7673, 6122.7673, 6122.7673] +25-08-31 06:13:05 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:13:05 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:13:05 | D | - sum error = [ 6215.1991, 6214.5549, 6236.9909, 6251.2098, 6256.8519] +25-08-31 06:13:05 | D | - best error = [ 6122.7673, 6122.7673, 6122.7673, 6122.7673, 6122.7673] +25-08-31 06:13:05 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:13:05 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:13:05 | D | - sum error = [ 6278.7938, 6309.3068, 6335.0788, 6355.6765] +25-08-31 06:13:05 | D | - best error = [ 6122.7673, 6122.7673, 6122.7673, 6122.7673] +25-08-31 06:13:05 | D | + error = 6122.7673 +25-08-31 06:13:05 | D | + scale = [min=1.1197, max=1.5614] +25-08-31 06:13:05 | D | - single_transformer_blocks.1.down_proj +25-08-31 06:13:05 | D | + w: sfp4_e2m1_all +25-08-31 06:13:05 | D | + x: sfp4_e2m1_all +25-08-31 06:13:05 | D | + y: None +25-08-31 06:13:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:13:05 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:13:08 | D | + x - AbsMax +25-08-31 06:13:08 | D | + x = [min=2.0000, max=18.2500] +25-08-31 06:13:08 | D | + w - AbsMax +25-08-31 06:13:08 | D | + w = [min=0.1138, max=1.5000] +25-08-31 06:13:08 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:13:10 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:15:05 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:15:05 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:15:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:15:05 | D | - sum error = [ 7114.8397, 7099.4966, 7106.6214, 7108.7774, 7111.6203] +25-08-31 06:15:05 | D | - best error = [ 7114.8397, 7099.4966, 7099.4966, 7099.4966, 7099.4966] +25-08-31 06:15:05 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:15:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:15:05 | D | - sum error = [ 7118.1059, 7116.3432, 7125.1218, 7128.6313, 7147.7487] +25-08-31 06:15:05 | D | - best error = [ 7099.4966, 7099.4966, 7099.4966, 7099.4966, 7099.4966] +25-08-31 06:15:05 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:15:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:15:05 | D | - sum error = [ 7182.8290, 7204.5025, 7223.3574, 7249.7098, 7274.7608] +25-08-31 06:15:05 | D | - best error = [ 7099.4966, 7099.4966, 7099.4966, 7099.4966, 7099.4966] +25-08-31 06:15:05 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:15:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:15:05 | D | - sum error = [ 7312.9874, 7348.6491, 7398.8410, 7430.7044, 7486.6893] +25-08-31 06:15:05 | D | - best error = [ 7099.4966, 7099.4966, 7099.4966, 7099.4966, 7099.4966] +25-08-31 06:15:05 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:15:05 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:15:05 | D | - sum error = [ 7385.4771, 7333.2648, 7288.1990, 7247.4575, 7221.0254] +25-08-31 06:15:05 | D | - best error = [ 7099.4966, 7099.4966, 7099.4966, 7099.4966, 7099.4966] +25-08-31 06:15:05 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:15:05 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:15:05 | D | - sum error = [ 7193.3200, 7176.3629, 7154.1954, 7136.3694, 7170.9071] +25-08-31 06:15:05 | D | - best error = [ 7099.4966, 7099.4966, 7099.4966, 7099.4966, 7099.4966] +25-08-31 06:15:05 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:15:05 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:15:05 | D | - sum error = [ 7135.1325, 7148.9770, 7174.1182, 7184.5724, 7225.3482] +25-08-31 06:15:05 | D | - best error = [ 7099.4966, 7099.4966, 7099.4966, 7099.4966, 7099.4966] +25-08-31 06:15:05 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:15:05 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:15:05 | D | - sum error = [ 7274.6346, 7332.9009, 7398.1317, 7466.3600] +25-08-31 06:15:05 | D | - best error = [ 7099.4966, 7099.4966, 7099.4966, 7099.4966] +25-08-31 06:15:05 | D | + error = 7099.4966 +25-08-31 06:15:05 | D | + scale = [min=1.0353, max=1.1563] +25-08-31 06:15:23 | D | - Smoothing Diffusion Block single_transformer_blocks.2 +25-08-31 06:15:23 | D | - Skipping Module single_transformer_blocks.2.norm.linear +25-08-31 06:15:23 | D | - Smoothing Transformer Block single_transformer_blocks.2 +25-08-31 06:15:23 | D | - single_transformer_blocks.2.attn.qkv_proj + single_transformer_blocks.2.up_proj +25-08-31 06:15:23 | D | + w: sfp4_e2m1_all +25-08-31 06:15:23 | D | + x: sfp4_e2m1_all +25-08-31 06:15:23 | D | + y: None +25-08-31 06:15:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:15:23 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:15:24 | D | + x - AbsMax +25-08-31 06:15:24 | D | + x = [min=0.2295, max=25.0000] +25-08-31 06:15:24 | D | + w - AbsMax +25-08-31 06:15:24 | D | + w = [min=0.0977, max=0.5234] +25-08-31 06:15:24 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:15:25 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:17:34 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:17:34 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:17:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:17:34 | D | - sum error = [16783.6721, 16612.4522, 16496.5548, 16530.4732, 16415.7242] +25-08-31 06:17:34 | D | - best error = [16783.6721, 16612.4522, 16496.5548, 16496.5548, 16415.7242] +25-08-31 06:17:34 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:17:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:17:34 | D | - sum error = [16439.7684, 16440.2511, 16567.7566, 16348.1928, 16317.6028] +25-08-31 06:17:34 | D | - best error = [16415.7242, 16415.7242, 16415.7242, 16348.1928, 16317.6028] +25-08-31 06:17:34 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:17:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:17:34 | D | - sum error = [16675.4795, 16511.3275, 16509.6306, 16284.0929, 16421.0938] +25-08-31 06:17:34 | D | - best error = [16317.6028, 16317.6028, 16317.6028, 16284.0929, 16284.0929] +25-08-31 06:17:34 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:17:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:17:34 | D | - sum error = [16487.2547, 16446.4610, 16422.3584, 16649.5357, 16735.8126] +25-08-31 06:17:34 | D | - best error = [16284.0929, 16284.0929, 16284.0929, 16284.0929, 16284.0929] +25-08-31 06:17:34 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:17:34 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:17:34 | D | - sum error = [18023.4221, 17740.5569, 17436.1757, 17139.7624, 17275.5957] +25-08-31 06:17:34 | D | - best error = [16284.0929, 16284.0929, 16284.0929, 16284.0929, 16284.0929] +25-08-31 06:17:34 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:17:34 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:17:34 | D | - sum error = [16877.0282, 17034.7973, 16893.5756, 17081.0070, 16977.0254] +25-08-31 06:17:34 | D | - best error = [16284.0929, 16284.0929, 16284.0929, 16284.0929, 16284.0929] +25-08-31 06:17:34 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:17:34 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:17:34 | D | - sum error = [16925.1548, 16594.4849, 16578.8646, 16839.9430, 16658.7356] +25-08-31 06:17:34 | D | - best error = [16284.0929, 16284.0929, 16284.0929, 16284.0929, 16284.0929] +25-08-31 06:17:34 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:17:34 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:17:34 | D | - sum error = [16559.2522, 16608.3285, 16708.8432, 16889.8426] +25-08-31 06:17:34 | D | - best error = [16284.0929, 16284.0929, 16284.0929, 16284.0929] +25-08-31 06:17:34 | D | + error = 16284.0929 +25-08-31 06:17:34 | D | + scale = [min=0.3841, max=8.1033] +25-08-31 06:17:34 | D | - single_transformer_blocks.2.attn.out_proj +25-08-31 06:17:34 | D | + w: sfp4_e2m1_all +25-08-31 06:17:34 | D | + x: sfp4_e2m1_all +25-08-31 06:17:34 | D | + y: None +25-08-31 06:17:34 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:17:34 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:17:35 | D | + x - AbsMax +25-08-31 06:17:35 | D | + x = [min=2.4375, max=14.9375] +25-08-31 06:17:35 | D | + w - AbsMax +25-08-31 06:17:35 | D | + w = [min=0.1182, max=0.3105] +25-08-31 06:17:35 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:17:36 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:18:34 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:18:34 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:18:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:18:34 | D | - sum error = [ 6999.6494, 6990.8340, 6978.0413, 6967.0849, 6970.3440] +25-08-31 06:18:34 | D | - best error = [ 6999.6494, 6990.8340, 6978.0413, 6967.0849, 6967.0849] +25-08-31 06:18:34 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:18:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:18:34 | D | - sum error = [ 6983.4645, 6981.2899, 6990.1385, 6970.1331, 6983.3491] +25-08-31 06:18:34 | D | - best error = [ 6967.0849, 6967.0849, 6967.0849, 6967.0849, 6967.0849] +25-08-31 06:18:34 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:18:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:18:34 | D | - sum error = [ 6994.4873, 7002.9260, 7006.6297, 7027.7709, 7056.3109] +25-08-31 06:18:34 | D | - best error = [ 6967.0849, 6967.0849, 6967.0849, 6967.0849, 6967.0849] +25-08-31 06:18:34 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:18:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:18:34 | D | - sum error = [ 7053.4315, 7068.4980, 7087.0431, 7090.6767, 7107.5672] +25-08-31 06:18:34 | D | - best error = [ 6967.0849, 6967.0849, 6967.0849, 6967.0849, 6967.0849] +25-08-31 06:18:34 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:18:34 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:18:34 | D | - sum error = [ 7043.8191, 7033.1728, 7004.6736, 7012.5510, 7001.0265] +25-08-31 06:18:34 | D | - best error = [ 6967.0849, 6967.0849, 6967.0849, 6967.0849, 6967.0849] +25-08-31 06:18:34 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:18:34 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:18:34 | D | - sum error = [ 7010.9817, 7002.4007, 6998.3844, 6994.5896, 7020.2650] +25-08-31 06:18:34 | D | - best error = [ 6967.0849, 6967.0849, 6967.0849, 6967.0849, 6967.0849] +25-08-31 06:18:34 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:18:34 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:18:34 | D | - sum error = [ 7008.8376, 7016.7195, 7031.2329, 7038.3504, 7041.5103] +25-08-31 06:18:34 | D | - best error = [ 6967.0849, 6967.0849, 6967.0849, 6967.0849, 6967.0849] +25-08-31 06:18:34 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:18:34 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:18:34 | D | - sum error = [ 7059.1441, 7070.1949, 7093.1533, 7114.0312] +25-08-31 06:18:34 | D | - best error = [ 6967.0849, 6967.0849, 6967.0849, 6967.0849] +25-08-31 06:18:34 | D | + error = 6967.0849 +25-08-31 06:18:34 | D | + scale = [min=1.1430, max=1.5002] +25-08-31 06:18:35 | D | - single_transformer_blocks.2.down_proj +25-08-31 06:18:35 | D | + w: sfp4_e2m1_all +25-08-31 06:18:35 | D | + x: sfp4_e2m1_all +25-08-31 06:18:35 | D | + y: None +25-08-31 06:18:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:18:35 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:18:38 | D | + x - AbsMax +25-08-31 06:18:38 | D | + x = [min=2.0156, max=18.6250] +25-08-31 06:18:38 | D | + w - AbsMax +25-08-31 06:18:38 | D | + w = [min=0.1147, max=1.4609] +25-08-31 06:18:38 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:18:40 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:20:41 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:20:41 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:20:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:20:41 | D | - sum error = [ 6847.5774, 6851.5911, 6867.2536, 6856.8669, 6847.9182] +25-08-31 06:20:41 | D | - best error = [ 6847.5774, 6847.5774, 6847.5774, 6847.5774, 6847.5774] +25-08-31 06:20:41 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:20:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:20:41 | D | - sum error = [ 6858.7156, 6904.4533, 6887.5145, 6897.8298, 6925.0883] +25-08-31 06:20:41 | D | - best error = [ 6847.5774, 6847.5774, 6847.5774, 6847.5774, 6847.5774] +25-08-31 06:20:41 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:20:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:20:41 | D | - sum error = [ 6929.4245, 6951.0537, 6972.3921, 7005.2018, 7037.2887] +25-08-31 06:20:41 | D | - best error = [ 6847.5774, 6847.5774, 6847.5774, 6847.5774, 6847.5774] +25-08-31 06:20:41 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:20:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:20:41 | D | - sum error = [ 7069.5250, 7135.6275, 7173.4589, 7221.3243, 7269.2412] +25-08-31 06:20:41 | D | - best error = [ 6847.5774, 6847.5774, 6847.5774, 6847.5774, 6847.5774] +25-08-31 06:20:41 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:20:41 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:20:41 | D | - sum error = [ 7138.5873, 7086.1408, 7037.5945, 7004.9520, 6971.6150] +25-08-31 06:20:41 | D | - best error = [ 6847.5774, 6847.5774, 6847.5774, 6847.5774, 6847.5774] +25-08-31 06:20:41 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:20:41 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:20:41 | D | - sum error = [ 6947.2242, 6926.4034, 6910.8037, 6903.3244, 6893.0124] +25-08-31 06:20:41 | D | - best error = [ 6847.5774, 6847.5774, 6847.5774, 6847.5774, 6847.5774] +25-08-31 06:20:41 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:20:41 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:20:41 | D | - sum error = [ 6878.2960, 6896.4481, 6925.7415, 6956.5796, 7005.0008] +25-08-31 06:20:41 | D | - best error = [ 6847.5774, 6847.5774, 6847.5774, 6847.5774, 6847.5774] +25-08-31 06:20:41 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:20:41 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:20:41 | D | - sum error = [ 7038.8255, 7107.7493, 7176.5802, 7245.4007] +25-08-31 06:20:41 | D | - best error = [ 6847.5774, 6847.5774, 6847.5774, 6847.5774] +25-08-31 06:20:41 | D | + error = 6847.5774 +25-08-31 06:20:41 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 06:21:00 | D | - Smoothing Diffusion Block single_transformer_blocks.3 +25-08-31 06:21:00 | D | - Skipping Module single_transformer_blocks.3.norm.linear +25-08-31 06:21:00 | D | - Smoothing Transformer Block single_transformer_blocks.3 +25-08-31 06:21:00 | D | - single_transformer_blocks.3.attn.qkv_proj + single_transformer_blocks.3.up_proj +25-08-31 06:21:00 | D | + w: sfp4_e2m1_all +25-08-31 06:21:00 | D | + x: sfp4_e2m1_all +25-08-31 06:21:00 | D | + y: None +25-08-31 06:21:00 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:21:00 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:21:00 | D | + x - AbsMax +25-08-31 06:21:00 | D | + x = [min=0.1768, max=31.6250] +25-08-31 06:21:00 | D | + w - AbsMax +25-08-31 06:21:00 | D | + w = [min=0.0913, max=0.5859] +25-08-31 06:21:00 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:21:01 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:23:08 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:23:08 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:23:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:23:08 | D | - sum error = [18080.9849, 17982.4875, 17736.0117, 17643.3974, 17612.5364] +25-08-31 06:23:08 | D | - best error = [18080.9849, 17982.4875, 17736.0117, 17643.3974, 17612.5364] +25-08-31 06:23:08 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:23:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:23:08 | D | - sum error = [17582.3311, 17834.1721, 17542.2088, 17339.2540, 17251.8604] +25-08-31 06:23:08 | D | - best error = [17582.3311, 17582.3311, 17542.2088, 17339.2540, 17251.8604] +25-08-31 06:23:08 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:23:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:23:08 | D | - sum error = [17393.8306, 17809.2812, 17764.8282, 17538.7872, 17575.7788] +25-08-31 06:23:08 | D | - best error = [17251.8604, 17251.8604, 17251.8604, 17251.8604, 17251.8604] +25-08-31 06:23:08 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:23:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:23:08 | D | - sum error = [17808.8374, 17831.5756, 17651.8328, 17807.2111, 17898.4397] +25-08-31 06:23:08 | D | - best error = [17251.8604, 17251.8604, 17251.8604, 17251.8604, 17251.8604] +25-08-31 06:23:08 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:23:08 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:23:08 | D | - sum error = [19555.6281, 19098.5269, 19073.3397, 18595.6244, 18241.4885] +25-08-31 06:23:08 | D | - best error = [17251.8604, 17251.8604, 17251.8604, 17251.8604, 17251.8604] +25-08-31 06:23:08 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:23:08 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:23:08 | D | - sum error = [17981.6867, 18529.9507, 17907.2651, 17937.0321, 17681.0316] +25-08-31 06:23:08 | D | - best error = [17251.8604, 17251.8604, 17251.8604, 17251.8604, 17251.8604] +25-08-31 06:23:08 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:23:08 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:23:08 | D | - sum error = [17736.6766, 17500.0709, 17681.4171, 17694.2775, 17758.7360] +25-08-31 06:23:08 | D | - best error = [17251.8604, 17251.8604, 17251.8604, 17251.8604, 17251.8604] +25-08-31 06:23:08 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:23:08 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:23:08 | D | - sum error = [17649.3345, 17937.6547, 18110.8365, 17673.5916] +25-08-31 06:23:08 | D | - best error = [17251.8604, 17251.8604, 17251.8604, 17251.8604] +25-08-31 06:23:08 | D | + error = 17251.8604 +25-08-31 06:23:08 | D | + scale = [min=0.4585, max=4.7317] +25-08-31 06:23:09 | D | - single_transformer_blocks.3.attn.out_proj +25-08-31 06:23:09 | D | + w: sfp4_e2m1_all +25-08-31 06:23:09 | D | + x: sfp4_e2m1_all +25-08-31 06:23:09 | D | + y: None +25-08-31 06:23:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:23:09 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:23:09 | D | + x - AbsMax +25-08-31 06:23:09 | D | + x = [min=2.0938, max=14.8750] +25-08-31 06:23:09 | D | + w - AbsMax +25-08-31 06:23:09 | D | + w = [min=0.1177, max=0.2949] +25-08-31 06:23:09 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:23:10 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:24:10 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:24:10 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:24:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:24:10 | D | - sum error = [ 6492.1436, 6492.0536, 6499.3203, 6491.8666, 6486.9108] +25-08-31 06:24:10 | D | - best error = [ 6492.1436, 6492.0536, 6492.0536, 6491.8666, 6486.9108] +25-08-31 06:24:10 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:24:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:24:10 | D | - sum error = [ 6483.7289, 6489.5649, 6493.2537, 6504.0852, 6508.6856] +25-08-31 06:24:10 | D | - best error = [ 6483.7289, 6483.7289, 6483.7289, 6483.7289, 6483.7289] +25-08-31 06:24:10 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:24:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:24:10 | D | - sum error = [ 6511.6446, 6511.8922, 6534.9118, 6540.9431, 6555.0307] +25-08-31 06:24:10 | D | - best error = [ 6483.7289, 6483.7289, 6483.7289, 6483.7289, 6483.7289] +25-08-31 06:24:10 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:24:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:24:10 | D | - sum error = [ 6565.9814, 6587.8194, 6602.6737, 6612.6657, 6641.8847] +25-08-31 06:24:10 | D | - best error = [ 6483.7289, 6483.7289, 6483.7289, 6483.7289, 6483.7289] +25-08-31 06:24:10 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:24:10 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:24:10 | D | - sum error = [ 6516.2640, 6524.0776, 6523.7719, 6516.8940, 6510.4839] +25-08-31 06:24:10 | D | - best error = [ 6483.7289, 6483.7289, 6483.7289, 6483.7289, 6483.7289] +25-08-31 06:24:10 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:24:10 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:24:10 | D | - sum error = [ 6522.3204, 6515.3106, 6510.0227, 6510.6037, 6510.9962] +25-08-31 06:24:10 | D | - best error = [ 6483.7289, 6483.7289, 6483.7289, 6483.7289, 6483.7289] +25-08-31 06:24:10 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:24:10 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:24:10 | D | - sum error = [ 6522.0635, 6538.9544, 6554.4806, 6563.2983, 6570.5117] +25-08-31 06:24:10 | D | - best error = [ 6483.7289, 6483.7289, 6483.7289, 6483.7289, 6483.7289] +25-08-31 06:24:10 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:24:10 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:24:10 | D | - sum error = [ 6598.9539, 6609.1476, 6622.4195, 6640.2117] +25-08-31 06:24:10 | D | - best error = [ 6483.7289, 6483.7289, 6483.7289, 6483.7289] +25-08-31 06:24:10 | D | + error = 6483.7289 +25-08-31 06:24:10 | D | + scale = [min=1.2029, max=1.9639] +25-08-31 06:24:10 | D | - single_transformer_blocks.3.down_proj +25-08-31 06:24:10 | D | + w: sfp4_e2m1_all +25-08-31 06:24:10 | D | + x: sfp4_e2m1_all +25-08-31 06:24:10 | D | + y: None +25-08-31 06:24:10 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:24:10 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:24:13 | D | + x - AbsMax +25-08-31 06:24:13 | D | + x = [min=1.6797, max=19.2500] +25-08-31 06:24:13 | D | + w - AbsMax +25-08-31 06:24:13 | D | + w = [min=0.1152, max=0.9180] +25-08-31 06:24:13 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:24:15 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:26:10 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:26:10 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:26:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:26:10 | D | - sum error = [ 6526.8721, 6535.2723, 6531.5282, 6528.4485, 6538.7051] +25-08-31 06:26:10 | D | - best error = [ 6526.8721, 6526.8721, 6526.8721, 6526.8721, 6526.8721] +25-08-31 06:26:10 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:26:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:26:10 | D | - sum error = [ 6537.9948, 6552.6856, 6549.6284, 6570.7001, 6593.9363] +25-08-31 06:26:10 | D | - best error = [ 6526.8721, 6526.8721, 6526.8721, 6526.8721, 6526.8721] +25-08-31 06:26:10 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:26:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:26:10 | D | - sum error = [ 6609.2375, 6634.7316, 6678.3962, 6701.3652, 6742.1581] +25-08-31 06:26:10 | D | - best error = [ 6526.8721, 6526.8721, 6526.8721, 6526.8721, 6526.8721] +25-08-31 06:26:10 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:26:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:26:10 | D | - sum error = [ 6779.3534, 6820.8686, 6872.1700, 6965.9742, 6972.7771] +25-08-31 06:26:10 | D | - best error = [ 6526.8721, 6526.8721, 6526.8721, 6526.8721, 6526.8721] +25-08-31 06:26:10 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:26:10 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:26:10 | D | - sum error = [ 6768.5981, 6715.3141, 6682.4876, 6666.4440, 6637.0536] +25-08-31 06:26:10 | D | - best error = [ 6526.8721, 6526.8721, 6526.8721, 6526.8721, 6526.8721] +25-08-31 06:26:10 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:26:10 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:26:10 | D | - sum error = [ 6617.2073, 6590.8091, 6568.1087, 6562.4114, 6562.7342] +25-08-31 06:26:10 | D | - best error = [ 6526.8721, 6526.8721, 6526.8721, 6526.8721, 6526.8721] +25-08-31 06:26:10 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:26:10 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:26:10 | D | - sum error = [ 6559.4417, 6575.8431, 6599.3900, 6626.5599, 6681.0852] +25-08-31 06:26:10 | D | - best error = [ 6526.8721, 6526.8721, 6526.8721, 6526.8721, 6526.8721] +25-08-31 06:26:10 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:26:10 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:26:10 | D | - sum error = [ 6736.6766, 6802.3090, 6870.6302, 6957.3049] +25-08-31 06:26:10 | D | - best error = [ 6526.8721, 6526.8721, 6526.8721, 6526.8721] +25-08-31 06:26:10 | D | + error = 6526.8721 +25-08-31 06:26:10 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 06:26:29 | D | - Smoothing Diffusion Block single_transformer_blocks.4 +25-08-31 06:26:29 | D | - Skipping Module single_transformer_blocks.4.norm.linear +25-08-31 06:26:29 | D | - Smoothing Transformer Block single_transformer_blocks.4 +25-08-31 06:26:29 | D | - single_transformer_blocks.4.attn.qkv_proj + single_transformer_blocks.4.up_proj +25-08-31 06:26:29 | D | + w: sfp4_e2m1_all +25-08-31 06:26:29 | D | + x: sfp4_e2m1_all +25-08-31 06:26:29 | D | + y: None +25-08-31 06:26:29 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:26:29 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:26:29 | D | + x - AbsMax +25-08-31 06:26:29 | D | + x = [min=0.1226, max=28.7500] +25-08-31 06:26:29 | D | + w - AbsMax +25-08-31 06:26:29 | D | + w = [min=0.0850, max=0.6367] +25-08-31 06:26:29 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:26:31 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:28:38 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:28:38 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:28:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:28:38 | D | - sum error = [19578.5760, 18954.6355, 18827.3758, 18735.6089, 18782.2325] +25-08-31 06:28:38 | D | - best error = [19578.5760, 18954.6355, 18827.3758, 18735.6089, 18735.6089] +25-08-31 06:28:38 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:28:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:28:38 | D | - sum error = [18858.8263, 18774.5845, 19280.2398, 18961.1350, 18912.5457] +25-08-31 06:28:38 | D | - best error = [18735.6089, 18735.6089, 18735.6089, 18735.6089, 18735.6089] +25-08-31 06:28:38 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:28:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:28:38 | D | - sum error = [18899.1606, 18681.5134, 19264.1728, 20491.0501, 19449.4175] +25-08-31 06:28:38 | D | - best error = [18735.6089, 18681.5134, 18681.5134, 18681.5134, 18681.5134] +25-08-31 06:28:38 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:28:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:28:38 | D | - sum error = [19492.7654, 19172.1381, 18891.1111, 19077.5621, 19163.4143] +25-08-31 06:28:38 | D | - best error = [18681.5134, 18681.5134, 18681.5134, 18681.5134, 18681.5134] +25-08-31 06:28:38 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:28:38 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:28:38 | D | - sum error = [22020.7916, 20531.6676, 20119.3185, 19892.7967, 19811.0208] +25-08-31 06:28:39 | D | - best error = [18681.5134, 18681.5134, 18681.5134, 18681.5134, 18681.5134] +25-08-31 06:28:39 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:28:39 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:28:39 | D | - sum error = [19896.4560, 19622.1051, 19334.2920, 19545.2345, 19200.9533] +25-08-31 06:28:39 | D | - best error = [18681.5134, 18681.5134, 18681.5134, 18681.5134, 18681.5134] +25-08-31 06:28:39 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:28:39 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:28:39 | D | - sum error = [18911.2978, 19616.6745, 19954.7039, 19169.3664, 19116.4231] +25-08-31 06:28:39 | D | - best error = [18681.5134, 18681.5134, 18681.5134, 18681.5134, 18681.5134] +25-08-31 06:28:39 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:28:39 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:28:39 | D | - sum error = [19169.4140, 18996.8629, 19061.8169, 19078.0360] +25-08-31 06:28:39 | D | - best error = [18681.5134, 18681.5134, 18681.5134, 18681.5134] +25-08-31 06:28:39 | D | + error = 18681.5134 +25-08-31 06:28:39 | D | + scale = [min=0.3152, max=6.3424] +25-08-31 06:28:39 | D | - single_transformer_blocks.4.attn.out_proj +25-08-31 06:28:39 | D | + w: sfp4_e2m1_all +25-08-31 06:28:39 | D | + x: sfp4_e2m1_all +25-08-31 06:28:39 | D | + y: None +25-08-31 06:28:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:28:39 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:28:40 | D | + x - AbsMax +25-08-31 06:28:40 | D | + x = [min=1.7266, max=15.2500] +25-08-31 06:28:40 | D | + w - AbsMax +25-08-31 06:28:40 | D | + w = [min=0.1182, max=0.3574] +25-08-31 06:28:40 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:28:40 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:29:41 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:29:41 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:29:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:29:41 | D | - sum error = [ 6119.1313, 6126.8834, 6119.2892, 6126.3337, 6130.3321] +25-08-31 06:29:41 | D | - best error = [ 6119.1313, 6119.1313, 6119.1313, 6119.1313, 6119.1313] +25-08-31 06:29:41 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:29:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:29:41 | D | - sum error = [ 6132.4677, 6134.8669, 6127.3688, 6143.3460, 6156.9788] +25-08-31 06:29:41 | D | - best error = [ 6119.1313, 6119.1313, 6119.1313, 6119.1313, 6119.1313] +25-08-31 06:29:41 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:29:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:29:41 | D | - sum error = [ 6156.8161, 6167.4877, 6191.8291, 6192.2445, 6207.0109] +25-08-31 06:29:41 | D | - best error = [ 6119.1313, 6119.1313, 6119.1313, 6119.1313, 6119.1313] +25-08-31 06:29:41 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:29:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:29:41 | D | - sum error = [ 6217.1929, 6227.7229, 6245.0890, 6252.7933, 6280.5522] +25-08-31 06:29:41 | D | - best error = [ 6119.1313, 6119.1313, 6119.1313, 6119.1313, 6119.1313] +25-08-31 06:29:41 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:29:41 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:29:41 | D | - sum error = [ 6178.3590, 6165.4943, 6163.2141, 6163.3416, 6166.0444] +25-08-31 06:29:41 | D | - best error = [ 6119.1313, 6119.1313, 6119.1313, 6119.1313, 6119.1313] +25-08-31 06:29:41 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:29:41 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:29:41 | D | - sum error = [ 6172.7637, 6154.8693, 6167.2013, 6158.9989, 6193.7743] +25-08-31 06:29:41 | D | - best error = [ 6119.1313, 6119.1313, 6119.1313, 6119.1313, 6119.1313] +25-08-31 06:29:41 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:29:41 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:29:41 | D | - sum error = [ 6181.2416, 6196.6146, 6217.8411, 6196.1491, 6240.2724] +25-08-31 06:29:41 | D | - best error = [ 6119.1313, 6119.1313, 6119.1313, 6119.1313, 6119.1313] +25-08-31 06:29:41 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:29:41 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:29:41 | D | - sum error = [ 6241.9862, 6250.7753, 6268.9033, 6287.2493] +25-08-31 06:29:41 | D | - best error = [ 6119.1313, 6119.1313, 6119.1313, 6119.1313] +25-08-31 06:29:41 | D | + error = 6119.1313 +25-08-31 06:29:41 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 06:29:41 | D | - single_transformer_blocks.4.down_proj +25-08-31 06:29:41 | D | + w: sfp4_e2m1_all +25-08-31 06:29:41 | D | + x: sfp4_e2m1_all +25-08-31 06:29:41 | D | + y: None +25-08-31 06:29:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:29:41 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:29:44 | D | + x - AbsMax +25-08-31 06:29:44 | D | + x = [min=1.6719, max=19.7500] +25-08-31 06:29:44 | D | + w - AbsMax +25-08-31 06:29:44 | D | + w = [min=0.1143, max=1.2422] +25-08-31 06:29:44 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:29:46 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:31:43 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:31:43 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:31:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:31:43 | D | - sum error = [ 6502.3956, 6510.6783, 6504.5916, 6496.1453, 6490.2169] +25-08-31 06:31:43 | D | - best error = [ 6502.3956, 6502.3956, 6502.3956, 6496.1453, 6490.2169] +25-08-31 06:31:43 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:31:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:31:43 | D | - sum error = [ 6484.9259, 6505.2656, 6524.4709, 6544.9229, 6549.4256] +25-08-31 06:31:43 | D | - best error = [ 6484.9259, 6484.9259, 6484.9259, 6484.9259, 6484.9259] +25-08-31 06:31:43 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:31:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:31:43 | D | - sum error = [ 6575.8549, 6610.0121, 6678.6409, 6672.1304, 6797.7470] +25-08-31 06:31:43 | D | - best error = [ 6484.9259, 6484.9259, 6484.9259, 6484.9259, 6484.9259] +25-08-31 06:31:43 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:31:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:31:43 | D | - sum error = [ 6733.7701, 6788.6656, 6836.3639, 6886.6291, 6933.4217] +25-08-31 06:31:43 | D | - best error = [ 6484.9259, 6484.9259, 6484.9259, 6484.9259, 6484.9259] +25-08-31 06:31:43 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:31:43 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:31:43 | D | - sum error = [ 6754.3133, 6697.0453, 6840.5181, 6641.3568, 6618.8585] +25-08-31 06:31:43 | D | - best error = [ 6484.9259, 6484.9259, 6484.9259, 6484.9259, 6484.9259] +25-08-31 06:31:43 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:31:43 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:31:43 | D | - sum error = [ 6592.6007, 6566.3689, 6539.8914, 6534.4949, 6536.9681] +25-08-31 06:31:43 | D | - best error = [ 6484.9259, 6484.9259, 6484.9259, 6484.9259, 6484.9259] +25-08-31 06:31:43 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:31:43 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:31:43 | D | - sum error = [ 6537.2462, 6550.1853, 6569.4755, 6605.5346, 6652.0707] +25-08-31 06:31:43 | D | - best error = [ 6484.9259, 6484.9259, 6484.9259, 6484.9259, 6484.9259] +25-08-31 06:31:43 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:31:43 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:31:43 | D | - sum error = [ 6703.9547, 6755.1397, 6846.8708, 6921.7576] +25-08-31 06:31:43 | D | - best error = [ 6484.9259, 6484.9259, 6484.9259, 6484.9259] +25-08-31 06:31:43 | D | + error = 6484.9259 +25-08-31 06:31:43 | D | + scale = [min=1.1371, max=2.1081] +25-08-31 06:32:03 | D | - Smoothing Diffusion Block single_transformer_blocks.5 +25-08-31 06:32:03 | D | - Skipping Module single_transformer_blocks.5.norm.linear +25-08-31 06:32:03 | D | - Smoothing Transformer Block single_transformer_blocks.5 +25-08-31 06:32:03 | D | - single_transformer_blocks.5.attn.qkv_proj + single_transformer_blocks.5.up_proj +25-08-31 06:32:03 | D | + w: sfp4_e2m1_all +25-08-31 06:32:03 | D | + x: sfp4_e2m1_all +25-08-31 06:32:03 | D | + y: None +25-08-31 06:32:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:32:03 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:32:03 | D | + x - AbsMax +25-08-31 06:32:03 | D | + x = [min=0.1709, max=26.3750] +25-08-31 06:32:03 | D | + w - AbsMax +25-08-31 06:32:03 | D | + w = [min=0.0820, max=0.5352] +25-08-31 06:32:03 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:32:04 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:34:12 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:34:12 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:34:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:34:12 | D | - sum error = [20680.3497, 20608.3354, 20581.1557, 20696.1693, 20350.5518] +25-08-31 06:34:12 | D | - best error = [20680.3497, 20608.3354, 20581.1557, 20581.1557, 20350.5518] +25-08-31 06:34:12 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:34:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:34:12 | D | - sum error = [20835.8215, 20287.7524, 20543.5919, 21059.6501, 20457.7895] +25-08-31 06:34:12 | D | - best error = [20350.5518, 20287.7524, 20287.7524, 20287.7524, 20287.7524] +25-08-31 06:34:12 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:34:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:34:12 | D | - sum error = [20021.1712, 20759.0327, 20045.8522, 20082.4652, 20169.0472] +25-08-31 06:34:12 | D | - best error = [20021.1712, 20021.1712, 20021.1712, 20021.1712, 20021.1712] +25-08-31 06:34:12 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:34:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:34:12 | D | - sum error = [20244.4825, 20717.0338, 20498.0741, 20543.7469, 20863.4034] +25-08-31 06:34:12 | D | - best error = [20021.1712, 20021.1712, 20021.1712, 20021.1712, 20021.1712] +25-08-31 06:34:12 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:34:12 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:34:12 | D | - sum error = [21849.2047, 21526.9491, 21288.5261, 21182.8172, 21372.7999] +25-08-31 06:34:12 | D | - best error = [20021.1712, 20021.1712, 20021.1712, 20021.1712, 20021.1712] +25-08-31 06:34:12 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:34:12 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:34:12 | D | - sum error = [20845.3805, 20728.8792, 20804.5864, 20900.0991, 20661.0250] +25-08-31 06:34:12 | D | - best error = [20021.1712, 20021.1712, 20021.1712, 20021.1712, 20021.1712] +25-08-31 06:34:12 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:34:12 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:34:12 | D | - sum error = [20771.4928, 20374.9666, 20276.3295, 20248.7183, 20224.3734] +25-08-31 06:34:12 | D | - best error = [20021.1712, 20021.1712, 20021.1712, 20021.1712, 20021.1712] +25-08-31 06:34:12 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:34:12 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:34:12 | D | - sum error = [20433.9827, 20533.1560, 20859.1298, 20533.0828] +25-08-31 06:34:12 | D | - best error = [20021.1712, 20021.1712, 20021.1712, 20021.1712] +25-08-31 06:34:12 | D | + error = 20021.1712 +25-08-31 06:34:12 | D | + scale = [min=0.4134, max=5.1357] +25-08-31 06:34:12 | D | - single_transformer_blocks.5.attn.out_proj +25-08-31 06:34:12 | D | + w: sfp4_e2m1_all +25-08-31 06:34:12 | D | + x: sfp4_e2m1_all +25-08-31 06:34:12 | D | + y: None +25-08-31 06:34:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:34:12 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:34:13 | D | + x - AbsMax +25-08-31 06:34:13 | D | + x = [min=1.8047, max=14.3125] +25-08-31 06:34:13 | D | + w - AbsMax +25-08-31 06:34:13 | D | + w = [min=0.1196, max=0.3359] +25-08-31 06:34:13 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:34:14 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:35:12 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:35:12 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:35:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:35:12 | D | - sum error = [ 5646.2766, 5646.5981, 5650.4356, 5647.3339, 5637.9996] +25-08-31 06:35:12 | D | - best error = [ 5646.2766, 5646.2766, 5646.2766, 5646.2766, 5637.9996] +25-08-31 06:35:12 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:35:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:35:12 | D | - sum error = [ 5638.5846, 5652.6327, 5652.7167, 5653.4324, 5657.6175] +25-08-31 06:35:12 | D | - best error = [ 5637.9996, 5637.9996, 5637.9996, 5637.9996, 5637.9996] +25-08-31 06:35:12 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:35:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:35:12 | D | - sum error = [ 5666.1010, 5678.3522, 5685.9427, 5704.0531, 5719.3601] +25-08-31 06:35:12 | D | - best error = [ 5637.9996, 5637.9996, 5637.9996, 5637.9996, 5637.9996] +25-08-31 06:35:12 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:35:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:35:12 | D | - sum error = [ 5737.0947, 5738.9509, 5759.2453, 5780.5841, 5799.9190] +25-08-31 06:35:12 | D | - best error = [ 5637.9996, 5637.9996, 5637.9996, 5637.9996, 5637.9996] +25-08-31 06:35:12 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:35:12 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:35:12 | D | - sum error = [ 5694.2013, 5693.8117, 5692.2980, 5680.9675, 5672.8305] +25-08-31 06:35:12 | D | - best error = [ 5637.9996, 5637.9996, 5637.9996, 5637.9996, 5637.9996] +25-08-31 06:35:12 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:35:12 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:35:12 | D | - sum error = [ 5678.6204, 5676.5549, 5663.2888, 5685.7438, 5680.7568] +25-08-31 06:35:12 | D | - best error = [ 5637.9996, 5637.9996, 5637.9996, 5637.9996, 5637.9996] +25-08-31 06:35:12 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:35:12 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:35:12 | D | - sum error = [ 5695.5788, 5697.0684, 5706.1924, 5722.8206, 5740.2530] +25-08-31 06:35:12 | D | - best error = [ 5637.9996, 5637.9996, 5637.9996, 5637.9996, 5637.9996] +25-08-31 06:35:12 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:35:12 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:35:12 | D | - sum error = [ 5740.5512, 5762.8236, 5778.4278, 5799.7214] +25-08-31 06:35:12 | D | - best error = [ 5637.9996, 5637.9996, 5637.9996, 5637.9996] +25-08-31 06:35:12 | D | + error = 5637.9996 +25-08-31 06:35:12 | D | + scale = [min=1.1253, max=1.7027] +25-08-31 06:35:13 | D | - single_transformer_blocks.5.down_proj +25-08-31 06:35:13 | D | + w: sfp4_e2m1_all +25-08-31 06:35:13 | D | + x: sfp4_e2m1_all +25-08-31 06:35:13 | D | + y: None +25-08-31 06:35:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:35:13 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:35:15 | D | + x - AbsMax +25-08-31 06:35:15 | D | + x = [min=1.7109, max=20.6250] +25-08-31 06:35:15 | D | + w - AbsMax +25-08-31 06:35:15 | D | + w = [min=0.1133, max=0.7773] +25-08-31 06:35:15 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:35:18 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:37:18 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:37:18 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:37:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:37:18 | D | - sum error = [ 6286.4998, 6305.0621, 6304.8569, 6289.0670, 6283.5557] +25-08-31 06:37:18 | D | - best error = [ 6286.4998, 6286.4998, 6286.4998, 6286.4998, 6283.5557] +25-08-31 06:37:18 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:37:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:37:18 | D | - sum error = [ 6341.9861, 6312.7563, 6325.0464, 6336.6161, 6347.9184] +25-08-31 06:37:18 | D | - best error = [ 6283.5557, 6283.5557, 6283.5557, 6283.5557, 6283.5557] +25-08-31 06:37:18 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:37:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:37:18 | D | - sum error = [ 6387.8791, 6417.6731, 6434.4327, 6474.4816, 6529.3096] +25-08-31 06:37:18 | D | - best error = [ 6283.5557, 6283.5557, 6283.5557, 6283.5557, 6283.5557] +25-08-31 06:37:18 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:37:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:37:18 | D | - sum error = [ 6614.2068, 6610.5609, 6661.5425, 6709.6712, 6762.7610] +25-08-31 06:37:18 | D | - best error = [ 6283.5557, 6283.5557, 6283.5557, 6283.5557, 6283.5557] +25-08-31 06:37:18 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:37:18 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:37:18 | D | - sum error = [ 6552.0629, 6516.7774, 6476.4126, 6445.1054, 6427.6425] +25-08-31 06:37:18 | D | - best error = [ 6283.5557, 6283.5557, 6283.5557, 6283.5557, 6283.5557] +25-08-31 06:37:18 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:37:18 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:37:18 | D | - sum error = [ 6392.6455, 6377.8642, 6356.4506, 6331.9975, 6372.5596] +25-08-31 06:37:18 | D | - best error = [ 6283.5557, 6283.5557, 6283.5557, 6283.5557, 6283.5557] +25-08-31 06:37:18 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:37:18 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:37:18 | D | - sum error = [ 6345.2675, 6358.4246, 6389.1176, 6476.1634, 6469.4451] +25-08-31 06:37:18 | D | - best error = [ 6283.5557, 6283.5557, 6283.5557, 6283.5557, 6283.5557] +25-08-31 06:37:18 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:37:18 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:37:18 | D | - sum error = [ 6521.7096, 6601.4271, 6663.6251, 6740.5089] +25-08-31 06:37:18 | D | - best error = [ 6283.5557, 6283.5557, 6283.5557, 6283.5557] +25-08-31 06:37:18 | D | + error = 6283.5557 +25-08-31 06:37:18 | D | + scale = [min=1.1134, max=1.8318] +25-08-31 06:37:38 | D | - Smoothing Diffusion Block single_transformer_blocks.6 +25-08-31 06:37:38 | D | - Skipping Module single_transformer_blocks.6.norm.linear +25-08-31 06:37:38 | D | - Smoothing Transformer Block single_transformer_blocks.6 +25-08-31 06:37:38 | D | - single_transformer_blocks.6.attn.qkv_proj + single_transformer_blocks.6.up_proj +25-08-31 06:37:38 | D | + w: sfp4_e2m1_all +25-08-31 06:37:38 | D | + x: sfp4_e2m1_all +25-08-31 06:37:38 | D | + y: None +25-08-31 06:37:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:37:38 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:37:38 | D | + x - AbsMax +25-08-31 06:37:38 | D | + x = [min=0.1621, max=23.1250] +25-08-31 06:37:38 | D | + w - AbsMax +25-08-31 06:37:38 | D | + w = [min=0.0869, max=0.6016] +25-08-31 06:37:38 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:37:39 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:39:49 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:39:49 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:39:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:39:49 | D | - sum error = [25222.5220, 24984.8133, 24682.6164, 24406.3756, 24077.6132] +25-08-31 06:39:49 | D | - best error = [25222.5220, 24984.8133, 24682.6164, 24406.3756, 24077.6132] +25-08-31 06:39:49 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:39:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:39:49 | D | - sum error = [23764.0428, 23695.1087, 23881.1135, 23445.7222, 23335.6654] +25-08-31 06:39:49 | D | - best error = [23764.0428, 23695.1087, 23695.1087, 23445.7222, 23335.6654] +25-08-31 06:39:49 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:39:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:39:49 | D | - sum error = [23306.7585, 23477.5290, 23647.1150, 23563.5284, 23608.9027] +25-08-31 06:39:49 | D | - best error = [23306.7585, 23306.7585, 23306.7585, 23306.7585, 23306.7585] +25-08-31 06:39:49 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:39:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:39:49 | D | - sum error = [23407.4627, 23626.3392, 23535.5377, 23758.6047, 23778.6257] +25-08-31 06:39:49 | D | - best error = [23306.7585, 23306.7585, 23306.7585, 23306.7585, 23306.7585] +25-08-31 06:39:49 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:39:49 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:39:49 | D | - sum error = [27499.1968, 27428.6169, 26719.1485, 26075.2879, 25817.3362] +25-08-31 06:39:49 | D | - best error = [23306.7585, 23306.7585, 23306.7585, 23306.7585, 23306.7585] +25-08-31 06:39:49 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:39:49 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:39:49 | D | - sum error = [25350.4979, 25091.3110, 25020.5496, 24292.0366, 24541.5918] +25-08-31 06:39:49 | D | - best error = [23306.7585, 23306.7585, 23306.7585, 23306.7585, 23306.7585] +25-08-31 06:39:49 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:39:49 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:39:49 | D | - sum error = [24281.3759, 23740.2356, 23688.1824, 23590.0128, 23641.8097] +25-08-31 06:39:49 | D | - best error = [23306.7585, 23306.7585, 23306.7585, 23306.7585, 23306.7585] +25-08-31 06:39:49 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:39:49 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:39:49 | D | - sum error = [23675.9186, 23955.0112, 23733.3103, 23934.5427] +25-08-31 06:39:49 | D | - best error = [23306.7585, 23306.7585, 23306.7585, 23306.7585] +25-08-31 06:39:49 | D | + error = 23306.7585 +25-08-31 06:39:49 | D | + scale = [min=0.4026, max=4.8088] +25-08-31 06:39:49 | D | - single_transformer_blocks.6.attn.out_proj +25-08-31 06:39:49 | D | + w: sfp4_e2m1_all +25-08-31 06:39:49 | D | + x: sfp4_e2m1_all +25-08-31 06:39:49 | D | + y: None +25-08-31 06:39:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:39:49 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:39:50 | D | + x - AbsMax +25-08-31 06:39:50 | D | + x = [min=1.9297, max=13.4375] +25-08-31 06:39:50 | D | + w - AbsMax +25-08-31 06:39:50 | D | + w = [min=0.1167, max=0.2969] +25-08-31 06:39:50 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:39:51 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:40:52 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:40:52 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:40:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:40:52 | D | - sum error = [ 5910.0437, 5908.0335, 5907.3344, 5918.1555, 5921.3853] +25-08-31 06:40:52 | D | - best error = [ 5910.0437, 5908.0335, 5907.3344, 5907.3344, 5907.3344] +25-08-31 06:40:52 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:40:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:40:52 | D | - sum error = [ 5904.5660, 5916.0392, 5908.5910, 5905.6586, 5927.9318] +25-08-31 06:40:52 | D | - best error = [ 5904.5660, 5904.5660, 5904.5660, 5904.5660, 5904.5660] +25-08-31 06:40:52 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:40:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:40:52 | D | - sum error = [ 5915.8164, 5922.1344, 5942.1555, 5955.8774, 5970.6003] +25-08-31 06:40:52 | D | - best error = [ 5904.5660, 5904.5660, 5904.5660, 5904.5660, 5904.5660] +25-08-31 06:40:52 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:40:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:40:52 | D | - sum error = [ 5973.7600, 5995.2778, 5994.0017, 6020.3571, 6041.9245] +25-08-31 06:40:52 | D | - best error = [ 5904.5660, 5904.5660, 5904.5660, 5904.5660, 5904.5660] +25-08-31 06:40:52 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:40:52 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:40:52 | D | - sum error = [ 5947.8500, 5945.7992, 5937.5129, 5923.2126, 5925.1089] +25-08-31 06:40:52 | D | - best error = [ 5904.5660, 5904.5660, 5904.5660, 5904.5660, 5904.5660] +25-08-31 06:40:52 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:40:52 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:40:52 | D | - sum error = [ 5927.8755, 5931.9815, 5926.6537, 5935.1246, 5928.2179] +25-08-31 06:40:52 | D | - best error = [ 5904.5660, 5904.5660, 5904.5660, 5904.5660, 5904.5660] +25-08-31 06:40:52 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:40:52 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:40:52 | D | - sum error = [ 5936.8173, 5941.6796, 5948.1638, 5968.0375, 5979.1518] +25-08-31 06:40:52 | D | - best error = [ 5904.5660, 5904.5660, 5904.5660, 5904.5660, 5904.5660] +25-08-31 06:40:52 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:40:52 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:40:52 | D | - sum error = [ 5996.4006, 6010.5847, 6017.3290, 6035.4473] +25-08-31 06:40:52 | D | - best error = [ 5904.5660, 5904.5660, 5904.5660, 5904.5660] +25-08-31 06:40:52 | D | + error = 5904.5660 +25-08-31 06:40:52 | D | + scale = [min=1.1786, max=1.9146] +25-08-31 06:40:52 | D | - single_transformer_blocks.6.down_proj +25-08-31 06:40:52 | D | + w: sfp4_e2m1_all +25-08-31 06:40:52 | D | + x: sfp4_e2m1_all +25-08-31 06:40:52 | D | + y: None +25-08-31 06:40:52 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:40:52 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:40:55 | D | + x - AbsMax +25-08-31 06:40:55 | D | + x = [min=0.8594, max=18.0000] +25-08-31 06:40:55 | D | + w - AbsMax +25-08-31 06:40:55 | D | + w = [min=0.1177, max=1.2578] +25-08-31 06:40:55 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:40:57 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:43:02 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:43:02 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:43:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:43:02 | D | - sum error = [ 6416.7867, 6429.8280, 6427.5631, 6420.9738, 6428.7815] +25-08-31 06:43:02 | D | - best error = [ 6416.7867, 6416.7867, 6416.7867, 6416.7867, 6416.7867] +25-08-31 06:43:02 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:43:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:43:02 | D | - sum error = [ 6425.7619, 6451.2634, 6461.3246, 6479.3960, 6504.1916] +25-08-31 06:43:02 | D | - best error = [ 6416.7867, 6416.7867, 6416.7867, 6416.7867, 6416.7867] +25-08-31 06:43:02 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:43:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:43:02 | D | - sum error = [ 6518.2943, 6540.2749, 6570.8709, 6603.2874, 6643.2998] +25-08-31 06:43:02 | D | - best error = [ 6416.7867, 6416.7867, 6416.7867, 6416.7867, 6416.7867] +25-08-31 06:43:02 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:43:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:43:02 | D | - sum error = [ 6687.6474, 6729.7138, 6771.7767, 6830.4726, 6879.8253] +25-08-31 06:43:02 | D | - best error = [ 6416.7867, 6416.7867, 6416.7867, 6416.7867, 6416.7867] +25-08-31 06:43:02 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:43:02 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:43:02 | D | - sum error = [ 6727.7096, 6679.9799, 6633.2405, 6598.4428, 6569.6868] +25-08-31 06:43:02 | D | - best error = [ 6416.7867, 6416.7867, 6416.7867, 6416.7867, 6416.7867] +25-08-31 06:43:02 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:43:02 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:43:02 | D | - sum error = [ 6543.4777, 6525.9844, 6504.7915, 6486.5621, 6485.2032] +25-08-31 06:43:02 | D | - best error = [ 6416.7867, 6416.7867, 6416.7867, 6416.7867, 6416.7867] +25-08-31 06:43:02 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:43:02 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:43:02 | D | - sum error = [ 6482.4267, 6503.7577, 6533.6060, 6564.6563, 6617.2036] +25-08-31 06:43:02 | D | - best error = [ 6416.7867, 6416.7867, 6416.7867, 6416.7867, 6416.7867] +25-08-31 06:43:02 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:43:02 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:43:02 | D | - sum error = [ 6659.4600, 6719.7187, 6782.9931, 6863.5559] +25-08-31 06:43:02 | D | - best error = [ 6416.7867, 6416.7867, 6416.7867, 6416.7867] +25-08-31 06:43:02 | D | + error = 6416.7867 +25-08-31 06:43:02 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 06:43:20 | D | - Smoothing Diffusion Block single_transformer_blocks.7 +25-08-31 06:43:20 | D | - Skipping Module single_transformer_blocks.7.norm.linear +25-08-31 06:43:20 | D | - Smoothing Transformer Block single_transformer_blocks.7 +25-08-31 06:43:20 | D | - single_transformer_blocks.7.attn.qkv_proj + single_transformer_blocks.7.up_proj +25-08-31 06:43:20 | D | + w: sfp4_e2m1_all +25-08-31 06:43:20 | D | + x: sfp4_e2m1_all +25-08-31 06:43:20 | D | + y: None +25-08-31 06:43:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:43:20 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:43:20 | D | + x - AbsMax +25-08-31 06:43:20 | D | + x = [min=0.1465, max=22.3750] +25-08-31 06:43:20 | D | + w - AbsMax +25-08-31 06:43:20 | D | + w = [min=0.0942, max=0.5703] +25-08-31 06:43:20 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:43:22 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:45:28 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:45:28 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:45:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:45:28 | D | - sum error = [27005.0401, 26681.3097, 26485.8640, 25937.2819, 25656.6685] +25-08-31 06:45:28 | D | - best error = [27005.0401, 26681.3097, 26485.8640, 25937.2819, 25656.6685] +25-08-31 06:45:28 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:45:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:45:28 | D | - sum error = [25626.4289, 25499.2643, 25499.2844, 25101.1993, 25204.2028] +25-08-31 06:45:28 | D | - best error = [25626.4289, 25499.2643, 25499.2643, 25101.1993, 25101.1993] +25-08-31 06:45:28 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:45:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:45:28 | D | - sum error = [25061.0723, 25241.1752, 25326.5420, 25173.3510, 25246.6844] +25-08-31 06:45:28 | D | - best error = [25061.0723, 25061.0723, 25061.0723, 25061.0723, 25061.0723] +25-08-31 06:45:28 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:45:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:45:28 | D | - sum error = [25030.0312, 25129.9496, 25379.4424, 25322.0675, 25614.2235] +25-08-31 06:45:28 | D | - best error = [25030.0312, 25030.0312, 25030.0312, 25030.0312, 25030.0312] +25-08-31 06:45:28 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:45:28 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:45:28 | D | - sum error = [28839.6771, 29224.1960, 28280.3779, 27793.8111, 27016.5105] +25-08-31 06:45:28 | D | - best error = [25030.0312, 25030.0312, 25030.0312, 25030.0312, 25030.0312] +25-08-31 06:45:28 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:45:28 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:45:28 | D | - sum error = [26382.4196, 26154.8778, 26197.3654, 26230.7313, 26732.6174] +25-08-31 06:45:28 | D | - best error = [25030.0312, 25030.0312, 25030.0312, 25030.0312, 25030.0312] +25-08-31 06:45:28 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:45:28 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:45:28 | D | - sum error = [25535.6717, 25582.0525, 25516.2342, 25571.6193, 25257.7738] +25-08-31 06:45:28 | D | - best error = [25030.0312, 25030.0312, 25030.0312, 25030.0312, 25030.0312] +25-08-31 06:45:28 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:45:28 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:45:28 | D | - sum error = [25321.1463, 25616.2061, 25614.0349, 25789.1673] +25-08-31 06:45:28 | D | - best error = [25030.0312, 25030.0312, 25030.0312, 25030.0312] +25-08-31 06:45:28 | D | + error = 25030.0312 +25-08-31 06:45:28 | D | + scale = [min=0.2368, max=10.2878] +25-08-31 06:45:28 | D | - single_transformer_blocks.7.attn.out_proj +25-08-31 06:45:28 | D | + w: sfp4_e2m1_all +25-08-31 06:45:28 | D | + x: sfp4_e2m1_all +25-08-31 06:45:28 | D | + y: None +25-08-31 06:45:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:45:28 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:45:29 | D | + x - AbsMax +25-08-31 06:45:29 | D | + x = [min=1.6562, max=8.8125] +25-08-31 06:45:29 | D | + w - AbsMax +25-08-31 06:45:29 | D | + w = [min=0.1147, max=0.3379] +25-08-31 06:45:29 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:45:29 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:46:28 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:46:28 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:46:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:46:28 | D | - sum error = [ 6062.8403, 6066.8565, 6077.7723, 6065.5176, 6066.7098] +25-08-31 06:46:28 | D | - best error = [ 6062.8403, 6062.8403, 6062.8403, 6062.8403, 6062.8403] +25-08-31 06:46:28 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:46:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:46:28 | D | - sum error = [ 6054.7964, 6046.3255, 6043.6846, 6046.5769, 6043.1528] +25-08-31 06:46:28 | D | - best error = [ 6054.7964, 6046.3255, 6043.6846, 6043.6846, 6043.1528] +25-08-31 06:46:28 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:46:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:46:28 | D | - sum error = [ 6049.2099, 6052.0605, 6043.4487, 6050.1673, 6051.3671] +25-08-31 06:46:28 | D | - best error = [ 6043.1528, 6043.1528, 6043.1528, 6043.1528, 6043.1528] +25-08-31 06:46:28 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:46:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:46:28 | D | - sum error = [ 6055.8699, 6072.6251, 6074.0267, 6078.5604, 6101.3874] +25-08-31 06:46:28 | D | - best error = [ 6043.1528, 6043.1528, 6043.1528, 6043.1528, 6043.1528] +25-08-31 06:46:28 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:46:28 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:46:28 | D | - sum error = [ 6131.6781, 6116.3741, 6108.9744, 6098.5720, 6086.8948] +25-08-31 06:46:28 | D | - best error = [ 6043.1528, 6043.1528, 6043.1528, 6043.1528, 6043.1528] +25-08-31 06:46:28 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:46:28 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:46:28 | D | - sum error = [ 6093.2651, 6075.8249, 6081.8251, 6060.5518, 6064.9432] +25-08-31 06:46:28 | D | - best error = [ 6043.1528, 6043.1528, 6043.1528, 6043.1528, 6043.1528] +25-08-31 06:46:28 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:46:28 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:46:28 | D | - sum error = [ 6069.6527, 6059.3043, 6065.1955, 6056.2549, 6056.1380] +25-08-31 06:46:28 | D | - best error = [ 6043.1528, 6043.1528, 6043.1528, 6043.1528, 6043.1528] +25-08-31 06:46:28 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:46:28 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:46:28 | D | - sum error = [ 6072.0194, 6081.5728, 6088.7112, 6096.7719] +25-08-31 06:46:28 | D | - best error = [ 6043.1528, 6043.1528, 6043.1528, 6043.1528] +25-08-31 06:46:28 | D | + error = 6043.1528 +25-08-31 06:46:28 | D | + scale = [min=1.2549, max=2.6625] +25-08-31 06:46:28 | D | - single_transformer_blocks.7.down_proj +25-08-31 06:46:28 | D | + w: sfp4_e2m1_all +25-08-31 06:46:28 | D | + x: sfp4_e2m1_all +25-08-31 06:46:28 | D | + y: None +25-08-31 06:46:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:46:28 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:46:30 | D | + x - AbsMax +25-08-31 06:46:30 | D | + x = [min=1.2969, max=22.8750] +25-08-31 06:46:30 | D | + w - AbsMax +25-08-31 06:46:30 | D | + w = [min=0.1147, max=0.7500] +25-08-31 06:46:30 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:46:32 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:48:18 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:48:18 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:48:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:48:18 | D | - sum error = [ 6666.9414, 6667.5414, 6665.3036, 6656.8467, 6655.2611] +25-08-31 06:48:18 | D | - best error = [ 6666.9414, 6666.9414, 6665.3036, 6656.8467, 6655.2611] +25-08-31 06:48:18 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:48:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:48:18 | D | - sum error = [ 6665.3006, 6668.0588, 6683.2453, 6705.6955, 6725.0740] +25-08-31 06:48:18 | D | - best error = [ 6655.2611, 6655.2611, 6655.2611, 6655.2611, 6655.2611] +25-08-31 06:48:18 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:48:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:48:18 | D | - sum error = [ 6751.9111, 6775.7190, 6799.7359, 6838.4387, 6879.9206] +25-08-31 06:48:18 | D | - best error = [ 6655.2611, 6655.2611, 6655.2611, 6655.2611, 6655.2611] +25-08-31 06:48:18 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:48:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:48:18 | D | - sum error = [ 6915.1616, 6988.9644, 7178.4948, 7075.9825, 7133.1068] +25-08-31 06:48:18 | D | - best error = [ 6655.2611, 6655.2611, 6655.2611, 6655.2611, 6655.2611] +25-08-31 06:48:18 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:48:18 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:48:18 | D | - sum error = [ 6938.5327, 6903.2445, 6857.1735, 6834.7951, 6807.8619] +25-08-31 06:48:18 | D | - best error = [ 6655.2611, 6655.2611, 6655.2611, 6655.2611, 6655.2611] +25-08-31 06:48:18 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:48:18 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:48:18 | D | - sum error = [ 6776.6880, 6758.8434, 6734.7814, 6722.2389, 6714.3530] +25-08-31 06:48:18 | D | - best error = [ 6655.2611, 6655.2611, 6655.2611, 6655.2611, 6655.2611] +25-08-31 06:48:18 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:48:18 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:48:18 | D | - sum error = [ 6715.5068, 6739.1928, 6763.5168, 6810.2640, 6861.1941] +25-08-31 06:48:18 | D | - best error = [ 6655.2611, 6655.2611, 6655.2611, 6655.2611, 6655.2611] +25-08-31 06:48:18 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:48:18 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:48:18 | D | - sum error = [ 6920.7401, 6957.7677, 7040.1091, 7107.6730] +25-08-31 06:48:18 | D | - best error = [ 6655.2611, 6655.2611, 6655.2611, 6655.2611] +25-08-31 06:48:18 | D | + error = 6655.2611 +25-08-31 06:48:18 | D | + scale = [min=1.0534, max=1.8701] +25-08-31 06:48:36 | D | - Smoothing Diffusion Block single_transformer_blocks.8 +25-08-31 06:48:36 | D | - Skipping Module single_transformer_blocks.8.norm.linear +25-08-31 06:48:36 | D | - Smoothing Transformer Block single_transformer_blocks.8 +25-08-31 06:48:36 | D | - single_transformer_blocks.8.attn.qkv_proj + single_transformer_blocks.8.up_proj +25-08-31 06:48:36 | D | + w: sfp4_e2m1_all +25-08-31 06:48:36 | D | + x: sfp4_e2m1_all +25-08-31 06:48:36 | D | + y: None +25-08-31 06:48:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:48:36 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:48:36 | D | + x - AbsMax +25-08-31 06:48:36 | D | + x = [min=0.1484, max=18.2500] +25-08-31 06:48:36 | D | + w - AbsMax +25-08-31 06:48:36 | D | + w = [min=0.0786, max=0.6562] +25-08-31 06:48:36 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:48:38 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:50:45 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:50:45 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:50:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:50:45 | D | - sum error = [28567.2674, 28256.5345, 28120.1844, 28099.4793, 27857.1889] +25-08-31 06:50:45 | D | - best error = [28567.2674, 28256.5345, 28120.1844, 28099.4793, 27857.1889] +25-08-31 06:50:45 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:50:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:50:45 | D | - sum error = [27979.8693, 27811.6891, 27843.3814, 29111.5548, 27720.1570] +25-08-31 06:50:45 | D | - best error = [27857.1889, 27811.6891, 27811.6891, 27811.6891, 27720.1570] +25-08-31 06:50:45 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:50:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:50:45 | D | - sum error = [27374.4137, 27436.0009, 27271.2608, 27205.2136, 27227.5437] +25-08-31 06:50:45 | D | - best error = [27374.4137, 27374.4137, 27271.2608, 27205.2136, 27205.2136] +25-08-31 06:50:45 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:50:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:50:45 | D | - sum error = [27518.0383, 27724.9542, 27573.9297, 27955.9493, 27801.6644] +25-08-31 06:50:45 | D | - best error = [27205.2136, 27205.2136, 27205.2136, 27205.2136, 27205.2136] +25-08-31 06:50:45 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:50:45 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:50:45 | D | - sum error = [31629.3261, 30817.0602, 30284.9719, 29692.6324, 29292.5022] +25-08-31 06:50:45 | D | - best error = [27205.2136, 27205.2136, 27205.2136, 27205.2136, 27205.2136] +25-08-31 06:50:45 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:50:45 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:50:45 | D | - sum error = [29160.2612, 28695.6342, 28501.5604, 28519.7285, 28266.9107] +25-08-31 06:50:45 | D | - best error = [27205.2136, 27205.2136, 27205.2136, 27205.2136, 27205.2136] +25-08-31 06:50:45 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:50:45 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:50:45 | D | - sum error = [28405.5668, 29249.7592, 27871.5453, 27707.8080, 27766.5479] +25-08-31 06:50:45 | D | - best error = [27205.2136, 27205.2136, 27205.2136, 27205.2136, 27205.2136] +25-08-31 06:50:45 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:50:45 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:50:45 | D | - sum error = [27993.6907, 27790.3626, 27673.9647, 27802.4954] +25-08-31 06:50:45 | D | - best error = [27205.2136, 27205.2136, 27205.2136, 27205.2136] +25-08-31 06:50:45 | D | + error = 27205.2136 +25-08-31 06:50:45 | D | + scale = [min=0.2894, max=6.6042] +25-08-31 06:50:45 | D | - single_transformer_blocks.8.attn.out_proj +25-08-31 06:50:45 | D | + w: sfp4_e2m1_all +25-08-31 06:50:45 | D | + x: sfp4_e2m1_all +25-08-31 06:50:45 | D | + y: None +25-08-31 06:50:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:50:45 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:50:46 | D | + x - AbsMax +25-08-31 06:50:46 | D | + x = [min=1.7188, max=9.1875] +25-08-31 06:50:46 | D | + w - AbsMax +25-08-31 06:50:46 | D | + w = [min=0.1211, max=0.3164] +25-08-31 06:50:46 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:50:46 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:51:45 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:51:45 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:51:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:51:45 | D | - sum error = [ 5353.2211, 5357.0869, 5351.7956, 5346.2536, 5349.2538] +25-08-31 06:51:45 | D | - best error = [ 5353.2211, 5353.2211, 5351.7956, 5346.2536, 5346.2536] +25-08-31 06:51:45 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:51:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:51:45 | D | - sum error = [ 5348.1889, 5353.5637, 5353.3068, 5348.8051, 5372.3235] +25-08-31 06:51:45 | D | - best error = [ 5346.2536, 5346.2536, 5346.2536, 5346.2536, 5346.2536] +25-08-31 06:51:45 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:51:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:51:45 | D | - sum error = [ 5372.1592, 5375.4851, 5366.5760, 5381.2610, 5393.2914] +25-08-31 06:51:45 | D | - best error = [ 5346.2536, 5346.2536, 5346.2536, 5346.2536, 5346.2536] +25-08-31 06:51:45 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:51:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:51:45 | D | - sum error = [ 5401.1454, 5411.5682, 5415.3476, 5425.9322, 5449.3058] +25-08-31 06:51:45 | D | - best error = [ 5346.2536, 5346.2536, 5346.2536, 5346.2536, 5346.2536] +25-08-31 06:51:45 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:51:45 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:51:45 | D | - sum error = [ 5395.1712, 5379.0373, 5385.8121, 5379.5933, 5378.9657] +25-08-31 06:51:45 | D | - best error = [ 5346.2536, 5346.2536, 5346.2536, 5346.2536, 5346.2536] +25-08-31 06:51:45 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:51:45 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:51:45 | D | - sum error = [ 5372.8451, 5375.5070, 5376.1220, 5384.6276, 5383.7407] +25-08-31 06:51:45 | D | - best error = [ 5346.2536, 5346.2536, 5346.2536, 5346.2536, 5346.2536] +25-08-31 06:51:45 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:51:45 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:51:45 | D | - sum error = [ 5375.6514, 5371.8958, 5399.0842, 5400.4179, 5405.7678] +25-08-31 06:51:45 | D | - best error = [ 5346.2536, 5346.2536, 5346.2536, 5346.2536, 5346.2536] +25-08-31 06:51:45 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:51:45 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:51:45 | D | - sum error = [ 5409.0066, 5415.6320, 5433.7013, 5448.8012] +25-08-31 06:51:45 | D | - best error = [ 5346.2536, 5346.2536, 5346.2536, 5346.2536] +25-08-31 06:51:45 | D | + error = 5346.2536 +25-08-31 06:51:45 | D | + scale = [min=1.0846, max=1.3947] +25-08-31 06:51:45 | D | - single_transformer_blocks.8.down_proj +25-08-31 06:51:45 | D | + w: sfp4_e2m1_all +25-08-31 06:51:45 | D | + x: sfp4_e2m1_all +25-08-31 06:51:45 | D | + y: None +25-08-31 06:51:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:51:45 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:51:48 | D | + x - AbsMax +25-08-31 06:51:48 | D | + x = [min=1.6719, max=13.6875] +25-08-31 06:51:48 | D | + w - AbsMax +25-08-31 06:51:48 | D | + w = [min=0.1143, max=1.5547] +25-08-31 06:51:48 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:51:50 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:53:53 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:53:53 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:53:53 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:53:53 | D | - sum error = [ 6282.7994, 6297.3404, 6287.2515, 6294.3089, 6286.2125] +25-08-31 06:53:53 | D | - best error = [ 6282.7994, 6282.7994, 6282.7994, 6282.7994, 6282.7994] +25-08-31 06:53:53 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:53:53 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:53:53 | D | - sum error = [ 6560.0829, 6299.8629, 6311.5659, 6325.3702, 6344.3509] +25-08-31 06:53:53 | D | - best error = [ 6282.7994, 6282.7994, 6282.7994, 6282.7994, 6282.7994] +25-08-31 06:53:53 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:53:53 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:53:53 | D | - sum error = [ 6372.3390, 6375.6361, 6408.9603, 6429.7387, 6457.0582] +25-08-31 06:53:53 | D | - best error = [ 6282.7994, 6282.7994, 6282.7994, 6282.7994, 6282.7994] +25-08-31 06:53:53 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:53:53 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:53:53 | D | - sum error = [ 6510.7941, 6537.3236, 6570.5896, 6608.8604, 6659.8805] +25-08-31 06:53:53 | D | - best error = [ 6282.7994, 6282.7994, 6282.7994, 6282.7994, 6282.7994] +25-08-31 06:53:53 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:53:53 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:53:53 | D | - sum error = [ 6682.2972, 6618.0130, 6579.4702, 6505.3805, 6484.8695] +25-08-31 06:53:53 | D | - best error = [ 6282.7994, 6282.7994, 6282.7994, 6282.7994, 6282.7994] +25-08-31 06:53:53 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:53:53 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:53:53 | D | - sum error = [ 6453.0600, 6424.3997, 6402.2821, 6362.1842, 6370.2672] +25-08-31 06:53:53 | D | - best error = [ 6282.7994, 6282.7994, 6282.7994, 6282.7994, 6282.7994] +25-08-31 06:53:53 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:53:53 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:53:53 | D | - sum error = [ 6362.5841, 6378.1552, 6401.9713, 6409.1272, 6456.2900] +25-08-31 06:53:53 | D | - best error = [ 6282.7994, 6282.7994, 6282.7994, 6282.7994, 6282.7994] +25-08-31 06:53:53 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:53:53 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:53:53 | D | - sum error = [ 6485.0776, 6538.1860, 6586.9216, 6639.1255] +25-08-31 06:53:53 | D | - best error = [ 6282.7994, 6282.7994, 6282.7994, 6282.7994] +25-08-31 06:53:53 | D | + error = 6282.7994 +25-08-31 06:53:53 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 06:54:11 | D | - Smoothing Diffusion Block single_transformer_blocks.9 +25-08-31 06:54:11 | D | - Skipping Module single_transformer_blocks.9.norm.linear +25-08-31 06:54:11 | D | - Smoothing Transformer Block single_transformer_blocks.9 +25-08-31 06:54:11 | D | - single_transformer_blocks.9.attn.qkv_proj + single_transformer_blocks.9.up_proj +25-08-31 06:54:11 | D | + w: sfp4_e2m1_all +25-08-31 06:54:11 | D | + x: sfp4_e2m1_all +25-08-31 06:54:11 | D | + y: None +25-08-31 06:54:11 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:54:11 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:54:12 | D | + x - AbsMax +25-08-31 06:54:12 | D | + x = [min=0.1738, max=19.8750] +25-08-31 06:54:12 | D | + w - AbsMax +25-08-31 06:54:12 | D | + w = [min=0.0752, max=0.8906] +25-08-31 06:54:12 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:54:13 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:56:23 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:56:23 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:56:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:56:23 | D | - sum error = [32943.9087, 32895.6984, 31745.3226, 31680.9172, 31411.1155] +25-08-31 06:56:23 | D | - best error = [32943.9087, 32895.6984, 31745.3226, 31680.9172, 31411.1155] +25-08-31 06:56:23 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:56:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:56:23 | D | - sum error = [31505.1234, 31137.7297, 30863.5417, 30426.9417, 30519.5072] +25-08-31 06:56:23 | D | - best error = [31411.1155, 31137.7297, 30863.5417, 30426.9417, 30426.9417] +25-08-31 06:56:23 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:56:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:56:23 | D | - sum error = [30497.6516, 30484.1110, 30378.9885, 31176.9104, 30831.6502] +25-08-31 06:56:23 | D | - best error = [30426.9417, 30426.9417, 30378.9885, 30378.9885, 30378.9885] +25-08-31 06:56:23 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:56:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:56:23 | D | - sum error = [30962.5828, 31173.0961, 30709.2314, 31088.0885, 31742.0322] +25-08-31 06:56:23 | D | - best error = [30378.9885, 30378.9885, 30378.9885, 30378.9885, 30378.9885] +25-08-31 06:56:23 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:56:23 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:56:23 | D | - sum error = [34039.3504, 33908.0677, 33012.4030, 33711.3659, 32629.6660] +25-08-31 06:56:23 | D | - best error = [30378.9885, 30378.9885, 30378.9885, 30378.9885, 30378.9885] +25-08-31 06:56:23 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:56:23 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:56:23 | D | - sum error = [31986.6172, 31901.3881, 32080.8738, 31179.8150, 31182.1847] +25-08-31 06:56:23 | D | - best error = [30378.9885, 30378.9885, 30378.9885, 30378.9885, 30378.9885] +25-08-31 06:56:23 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:56:23 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:56:23 | D | - sum error = [30876.3936, 30799.2509, 30810.8158, 31243.9393, 30885.9010] +25-08-31 06:56:23 | D | - best error = [30378.9885, 30378.9885, 30378.9885, 30378.9885, 30378.9885] +25-08-31 06:56:23 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:56:23 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:56:23 | D | - sum error = [31007.8173, 30969.7680, 31240.2028, 31675.9937] +25-08-31 06:56:23 | D | - best error = [30378.9885, 30378.9885, 30378.9885, 30378.9885] +25-08-31 06:56:23 | D | + error = 30378.9885 +25-08-31 06:56:23 | D | + scale = [min=0.3500, max=6.0115] +25-08-31 06:56:23 | D | - single_transformer_blocks.9.attn.out_proj +25-08-31 06:56:23 | D | + w: sfp4_e2m1_all +25-08-31 06:56:23 | D | + x: sfp4_e2m1_all +25-08-31 06:56:23 | D | + y: None +25-08-31 06:56:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:56:23 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:56:24 | D | + x - AbsMax +25-08-31 06:56:24 | D | + x = [min=1.9375, max=10.2500] +25-08-31 06:56:24 | D | + w - AbsMax +25-08-31 06:56:24 | D | + w = [min=0.1226, max=0.4766] +25-08-31 06:56:24 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:56:25 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:57:24 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:57:24 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:57:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:57:24 | D | - sum error = [ 6135.5678, 6129.2241, 6127.8796, 6129.7745, 6125.2540] +25-08-31 06:57:24 | D | - best error = [ 6135.5678, 6129.2241, 6127.8796, 6127.8796, 6125.2540] +25-08-31 06:57:24 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:57:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:57:24 | D | - sum error = [ 6107.8366, 6119.9670, 6118.9787, 6135.4345, 6123.7358] +25-08-31 06:57:24 | D | - best error = [ 6107.8366, 6107.8366, 6107.8366, 6107.8366, 6107.8366] +25-08-31 06:57:24 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:57:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:57:24 | D | - sum error = [ 6133.5054, 6131.4791, 6149.9880, 6146.7611, 6171.4805] +25-08-31 06:57:24 | D | - best error = [ 6107.8366, 6107.8366, 6107.8366, 6107.8366, 6107.8366] +25-08-31 06:57:24 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:57:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:57:24 | D | - sum error = [ 6160.5250, 6179.5084, 6196.0626, 6207.8350, 6217.1312] +25-08-31 06:57:24 | D | - best error = [ 6107.8366, 6107.8366, 6107.8366, 6107.8366, 6107.8366] +25-08-31 06:57:24 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:57:24 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:57:24 | D | - sum error = [ 6164.2197, 6166.4333, 6153.1101, 6157.4336, 6158.8996] +25-08-31 06:57:24 | D | - best error = [ 6107.8366, 6107.8366, 6107.8366, 6107.8366, 6107.8366] +25-08-31 06:57:24 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:57:24 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:57:24 | D | - sum error = [ 6141.3881, 6152.4688, 6148.0758, 6138.1922, 6148.8230] +25-08-31 06:57:24 | D | - best error = [ 6107.8366, 6107.8366, 6107.8366, 6107.8366, 6107.8366] +25-08-31 06:57:24 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:57:24 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:57:24 | D | - sum error = [ 6145.4741, 6147.5784, 6148.8275, 6176.1924, 6169.0051] +25-08-31 06:57:24 | D | - best error = [ 6107.8366, 6107.8366, 6107.8366, 6107.8366, 6107.8366] +25-08-31 06:57:24 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:57:24 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:57:24 | D | - sum error = [ 6180.9524, 6186.5260, 6206.3732, 6214.5867] +25-08-31 06:57:24 | D | - best error = [ 6107.8366, 6107.8366, 6107.8366, 6107.8366] +25-08-31 06:57:24 | D | + error = 6107.8366 +25-08-31 06:57:24 | D | + scale = [min=1.1798, max=1.7893] +25-08-31 06:57:25 | D | - single_transformer_blocks.9.down_proj +25-08-31 06:57:25 | D | + w: sfp4_e2m1_all +25-08-31 06:57:25 | D | + x: sfp4_e2m1_all +25-08-31 06:57:25 | D | + y: None +25-08-31 06:57:25 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:57:25 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:57:28 | D | + x - AbsMax +25-08-31 06:57:28 | D | + x = [min=0.5586, max=18.2500] +25-08-31 06:57:28 | D | + w - AbsMax +25-08-31 06:57:28 | D | + w = [min=0.1191, max=1.2344] +25-08-31 06:57:28 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:57:30 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 06:59:30 | D | - x / w range = AbsMax / AbsMax +25-08-31 06:59:30 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 06:59:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:59:30 | D | - sum error = [ 6062.4312, 6078.7844, 6068.3012, 6064.9791, 6074.0217] +25-08-31 06:59:30 | D | - best error = [ 6062.4312, 6062.4312, 6062.4312, 6062.4312, 6062.4312] +25-08-31 06:59:30 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 06:59:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:59:30 | D | - sum error = [ 6096.5134, 6100.5203, 6109.2081, 6117.5198, 6142.6607] +25-08-31 06:59:30 | D | - best error = [ 6062.4312, 6062.4312, 6062.4312, 6062.4312, 6062.4312] +25-08-31 06:59:30 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 06:59:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:59:30 | D | - sum error = [ 6161.5092, 6177.6695, 6187.1548, 6212.5376, 6239.4747] +25-08-31 06:59:30 | D | - best error = [ 6062.4312, 6062.4312, 6062.4312, 6062.4312, 6062.4312] +25-08-31 06:59:30 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:59:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 06:59:30 | D | - sum error = [ 6269.8922, 6301.7169, 6323.9639, 6371.5063, 6410.8371] +25-08-31 06:59:30 | D | - best error = [ 6062.4312, 6062.4312, 6062.4312, 6062.4312, 6062.4312] +25-08-31 06:59:30 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 06:59:30 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 06:59:30 | D | - sum error = [ 6295.7208, 6263.8372, 6238.4270, 6208.9722, 6201.8733] +25-08-31 06:59:30 | D | - best error = [ 6062.4312, 6062.4312, 6062.4312, 6062.4312, 6062.4312] +25-08-31 06:59:30 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 06:59:30 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 06:59:30 | D | - sum error = [ 6180.2181, 6162.7526, 6151.8761, 6117.1552, 6138.9686] +25-08-31 06:59:30 | D | - best error = [ 6062.4312, 6062.4312, 6062.4312, 6062.4312, 6062.4312] +25-08-31 06:59:30 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 06:59:30 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 06:59:30 | D | - sum error = [ 6142.9517, 6152.2838, 6175.7824, 6203.0009, 6230.1788] +25-08-31 06:59:30 | D | - best error = [ 6062.4312, 6062.4312, 6062.4312, 6062.4312, 6062.4312] +25-08-31 06:59:30 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 06:59:30 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 06:59:30 | D | - sum error = [ 6525.1434, 6296.5032, 6347.6118, 6389.9349] +25-08-31 06:59:30 | D | - best error = [ 6062.4312, 6062.4312, 6062.4312, 6062.4312] +25-08-31 06:59:30 | D | + error = 6062.4312 +25-08-31 06:59:30 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 06:59:49 | D | - Smoothing Diffusion Block single_transformer_blocks.10 +25-08-31 06:59:49 | D | - Skipping Module single_transformer_blocks.10.norm.linear +25-08-31 06:59:49 | D | - Smoothing Transformer Block single_transformer_blocks.10 +25-08-31 06:59:49 | D | - single_transformer_blocks.10.attn.qkv_proj + single_transformer_blocks.10.up_proj +25-08-31 06:59:49 | D | + w: sfp4_e2m1_all +25-08-31 06:59:49 | D | + x: sfp4_e2m1_all +25-08-31 06:59:49 | D | + y: None +25-08-31 06:59:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 06:59:49 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 06:59:49 | D | + x - AbsMax +25-08-31 06:59:49 | D | + x = [min=0.1221, max=16.5000] +25-08-31 06:59:49 | D | + w - AbsMax +25-08-31 06:59:49 | D | + w = [min=0.0840, max=0.6523] +25-08-31 06:59:49 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 06:59:50 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:01:57 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:01:57 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:01:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:01:57 | D | - sum error = [37515.8463, 36839.8046, 36580.5666, 36318.5691, 36035.9465] +25-08-31 07:01:57 | D | - best error = [37515.8463, 36839.8046, 36580.5666, 36318.5691, 36035.9465] +25-08-31 07:01:57 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:01:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:01:57 | D | - sum error = [36212.2013, 35484.6996, 35143.5110, 35181.9772, 35112.8485] +25-08-31 07:01:57 | D | - best error = [36035.9465, 35484.6996, 35143.5110, 35143.5110, 35112.8485] +25-08-31 07:01:57 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:01:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:01:57 | D | - sum error = [35041.1415, 34927.1333, 34791.2154, 34797.8887, 35012.4019] +25-08-31 07:01:57 | D | - best error = [35041.1415, 34927.1333, 34791.2154, 34791.2154, 34791.2154] +25-08-31 07:01:57 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:01:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:01:57 | D | - sum error = [35293.7189, 34968.8886, 35417.0074, 34888.4247, 35107.3162] +25-08-31 07:01:57 | D | - best error = [34791.2154, 34791.2154, 34791.2154, 34791.2154, 34791.2154] +25-08-31 07:01:57 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:01:57 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:01:57 | D | - sum error = [43114.4011, 42634.6728, 40969.0349, 40119.8253, 38786.8761] +25-08-31 07:01:57 | D | - best error = [34791.2154, 34791.2154, 34791.2154, 34791.2154, 34791.2154] +25-08-31 07:01:57 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:01:57 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:01:57 | D | - sum error = [37916.3593, 37360.3957, 37416.2506, 36786.0304, 36177.0615] +25-08-31 07:01:57 | D | - best error = [34791.2154, 34791.2154, 34791.2154, 34791.2154, 34791.2154] +25-08-31 07:01:57 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:01:57 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:01:57 | D | - sum error = [35829.8093, 35525.1113, 35340.2750, 35198.5945, 35519.6252] +25-08-31 07:01:57 | D | - best error = [34791.2154, 34791.2154, 34791.2154, 34791.2154, 34791.2154] +25-08-31 07:01:57 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:01:57 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:01:57 | D | - sum error = [35151.5776, 35038.2210, 35337.0496, 35123.9706] +25-08-31 07:01:57 | D | - best error = [34791.2154, 34791.2154, 34791.2154, 34791.2154] +25-08-31 07:01:57 | D | + error = 34791.2154 +25-08-31 07:01:57 | D | + scale = [min=0.2831, max=5.3764] +25-08-31 07:01:57 | D | - single_transformer_blocks.10.attn.out_proj +25-08-31 07:01:57 | D | + w: sfp4_e2m1_all +25-08-31 07:01:57 | D | + x: sfp4_e2m1_all +25-08-31 07:01:57 | D | + y: None +25-08-31 07:01:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:01:57 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:01:58 | D | + x - AbsMax +25-08-31 07:01:58 | D | + x = [min=1.5781, max=9.5000] +25-08-31 07:01:58 | D | + w - AbsMax +25-08-31 07:01:58 | D | + w = [min=0.1177, max=0.3691] +25-08-31 07:01:58 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:01:58 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:02:55 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:02:55 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:02:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:02:55 | D | - sum error = [ 5742.1891, 5739.5397, 5738.6132, 5728.3162, 5731.6460] +25-08-31 07:02:55 | D | - best error = [ 5742.1891, 5739.5397, 5738.6132, 5728.3162, 5728.3162] +25-08-31 07:02:55 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:02:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:02:55 | D | - sum error = [ 5730.4775, 5733.8259, 5742.6013, 5738.8940, 5718.6759] +25-08-31 07:02:55 | D | - best error = [ 5728.3162, 5728.3162, 5728.3162, 5728.3162, 5718.6759] +25-08-31 07:02:55 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:02:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:02:55 | D | - sum error = [ 5734.4618, 5734.9295, 5750.3541, 5744.5256, 5752.1201] +25-08-31 07:02:55 | D | - best error = [ 5718.6759, 5718.6759, 5718.6759, 5718.6759, 5718.6759] +25-08-31 07:02:55 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:02:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:02:55 | D | - sum error = [ 5758.5220, 5777.1820, 5773.2616, 5781.6088, 5795.2360] +25-08-31 07:02:55 | D | - best error = [ 5718.6759, 5718.6759, 5718.6759, 5718.6759, 5718.6759] +25-08-31 07:02:55 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:02:55 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:02:55 | D | - sum error = [ 5800.8703, 5799.3793, 5782.8125, 5773.0624, 5762.5697] +25-08-31 07:02:55 | D | - best error = [ 5718.6759, 5718.6759, 5718.6759, 5718.6759, 5718.6759] +25-08-31 07:02:55 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:02:55 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:02:55 | D | - sum error = [ 5757.3379, 5754.5443, 5755.2331, 5760.6609, 5741.4075] +25-08-31 07:02:55 | D | - best error = [ 5718.6759, 5718.6759, 5718.6759, 5718.6759, 5718.6759] +25-08-31 07:02:55 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:02:55 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:02:55 | D | - sum error = [ 5752.5796, 5749.5676, 5754.0779, 5749.4271, 5753.3307] +25-08-31 07:02:55 | D | - best error = [ 5718.6759, 5718.6759, 5718.6759, 5718.6759, 5718.6759] +25-08-31 07:02:55 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:02:55 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:02:55 | D | - sum error = [ 5757.4762, 5772.9339, 5788.2146, 5800.2734] +25-08-31 07:02:55 | D | - best error = [ 5718.6759, 5718.6759, 5718.6759, 5718.6759] +25-08-31 07:02:55 | D | + error = 5718.6759 +25-08-31 07:02:55 | D | + scale = [min=1.2279, max=2.7541] +25-08-31 07:02:56 | D | - single_transformer_blocks.10.down_proj +25-08-31 07:02:56 | D | + w: sfp4_e2m1_all +25-08-31 07:02:56 | D | + x: sfp4_e2m1_all +25-08-31 07:02:56 | D | + y: None +25-08-31 07:02:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:02:56 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:02:58 | D | + x - AbsMax +25-08-31 07:02:58 | D | + x = [min=1.2109, max=13.4375] +25-08-31 07:02:58 | D | + w - AbsMax +25-08-31 07:02:58 | D | + w = [min=0.1187, max=1.1094] +25-08-31 07:02:58 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:03:02 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:04:56 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:04:56 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:04:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:04:56 | D | - sum error = [ 6578.8838, 6635.5048, 6593.8168, 6583.6737, 6584.4733] +25-08-31 07:04:56 | D | - best error = [ 6578.8838, 6578.8838, 6578.8838, 6578.8838, 6578.8838] +25-08-31 07:04:56 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:04:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:04:56 | D | - sum error = [ 6603.4579, 6606.5204, 6631.4014, 6631.6019, 6635.1310] +25-08-31 07:04:56 | D | - best error = [ 6578.8838, 6578.8838, 6578.8838, 6578.8838, 6578.8838] +25-08-31 07:04:56 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:04:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:04:56 | D | - sum error = [ 6655.5514, 6677.2830, 6702.2104, 6724.1529, 6747.2883] +25-08-31 07:04:56 | D | - best error = [ 6578.8838, 6578.8838, 6578.8838, 6578.8838, 6578.8838] +25-08-31 07:04:56 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:04:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:04:56 | D | - sum error = [ 6795.0261, 6806.7234, 6842.1687, 6890.6757, 6926.7762] +25-08-31 07:04:56 | D | - best error = [ 6578.8838, 6578.8838, 6578.8838, 6578.8838, 6578.8838] +25-08-31 07:04:56 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:04:56 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:04:56 | D | - sum error = [ 7056.2262, 6986.6186, 6934.8961, 6883.3469, 6834.1852] +25-08-31 07:04:56 | D | - best error = [ 6578.8838, 6578.8838, 6578.8838, 6578.8838, 6578.8838] +25-08-31 07:04:56 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:04:56 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:04:56 | D | - sum error = [ 6787.4817, 6750.8017, 6716.2758, 6698.6670, 6688.9153] +25-08-31 07:04:56 | D | - best error = [ 6578.8838, 6578.8838, 6578.8838, 6578.8838, 6578.8838] +25-08-31 07:04:56 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:04:56 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:04:56 | D | - sum error = [ 6686.4574, 6683.2453, 6708.8455, 6938.1623, 6738.0947] +25-08-31 07:04:56 | D | - best error = [ 6578.8838, 6578.8838, 6578.8838, 6578.8838, 6578.8838] +25-08-31 07:04:56 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:04:56 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:04:56 | D | - sum error = [ 6772.7091, 6813.8309, 6852.6322, 6907.3627] +25-08-31 07:04:56 | D | - best error = [ 6578.8838, 6578.8838, 6578.8838, 6578.8838] +25-08-31 07:04:56 | D | + error = 6578.8838 +25-08-31 07:04:56 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 07:05:15 | D | - Smoothing Diffusion Block single_transformer_blocks.11 +25-08-31 07:05:15 | D | - Skipping Module single_transformer_blocks.11.norm.linear +25-08-31 07:05:15 | D | - Smoothing Transformer Block single_transformer_blocks.11 +25-08-31 07:05:15 | D | - single_transformer_blocks.11.attn.qkv_proj + single_transformer_blocks.11.up_proj +25-08-31 07:05:15 | D | + w: sfp4_e2m1_all +25-08-31 07:05:15 | D | + x: sfp4_e2m1_all +25-08-31 07:05:15 | D | + y: None +25-08-31 07:05:15 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:05:15 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:05:15 | D | + x - AbsMax +25-08-31 07:05:15 | D | + x = [min=0.1543, max=21.7500] +25-08-31 07:05:16 | D | + w - AbsMax +25-08-31 07:05:16 | D | + w = [min=0.0679, max=0.6641] +25-08-31 07:05:16 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:05:17 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:07:24 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:07:24 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:07:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:07:24 | D | - sum error = [40670.6168, 40341.0263, 40102.0394, 39861.9393, 39649.0169] +25-08-31 07:07:24 | D | - best error = [40670.6168, 40341.0263, 40102.0394, 39861.9393, 39649.0169] +25-08-31 07:07:24 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:07:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:07:24 | D | - sum error = [39359.6016, 39541.8741, 38937.3224, 38950.4875, 38955.9127] +25-08-31 07:07:24 | D | - best error = [39359.6016, 39359.6016, 38937.3224, 38937.3224, 38937.3224] +25-08-31 07:07:24 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:07:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:07:24 | D | - sum error = [38712.9597, 38921.1371, 38761.8435, 38845.4513, 38820.4037] +25-08-31 07:07:24 | D | - best error = [38712.9597, 38712.9597, 38712.9597, 38712.9597, 38712.9597] +25-08-31 07:07:24 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:07:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:07:24 | D | - sum error = [38803.1801, 38938.5206, 38896.5759, 39234.2397, 39072.9353] +25-08-31 07:07:24 | D | - best error = [38712.9597, 38712.9597, 38712.9597, 38712.9597, 38712.9597] +25-08-31 07:07:24 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:07:24 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:07:24 | D | - sum error = [43813.6870, 42964.1383, 42380.5737, 41768.9431, 41533.8613] +25-08-31 07:07:24 | D | - best error = [38712.9597, 38712.9597, 38712.9597, 38712.9597, 38712.9597] +25-08-31 07:07:24 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:07:24 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:07:24 | D | - sum error = [40692.9655, 40723.2675, 40163.8197, 39868.9212, 39733.0371] +25-08-31 07:07:24 | D | - best error = [38712.9597, 38712.9597, 38712.9597, 38712.9597, 38712.9597] +25-08-31 07:07:24 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:07:24 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:07:24 | D | - sum error = [39285.0001, 39269.9930, 39194.5825, 38952.4789, 39031.3663] +25-08-31 07:07:24 | D | - best error = [38712.9597, 38712.9597, 38712.9597, 38712.9597, 38712.9597] +25-08-31 07:07:24 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:07:24 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:07:24 | D | - sum error = [38908.6035, 38959.6731, 39295.3990, 39107.0212] +25-08-31 07:07:24 | D | - best error = [38712.9597, 38712.9597, 38712.9597, 38712.9597] +25-08-31 07:07:24 | D | + error = 38712.9597 +25-08-31 07:07:24 | D | + scale = [min=0.3928, max=4.6637] +25-08-31 07:07:24 | D | - single_transformer_blocks.11.attn.out_proj +25-08-31 07:07:24 | D | + w: sfp4_e2m1_all +25-08-31 07:07:24 | D | + x: sfp4_e2m1_all +25-08-31 07:07:24 | D | + y: None +25-08-31 07:07:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:07:24 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:07:25 | D | + x - AbsMax +25-08-31 07:07:25 | D | + x = [min=1.3125, max=8.4375] +25-08-31 07:07:25 | D | + w - AbsMax +25-08-31 07:07:25 | D | + w = [min=0.1260, max=0.4004] +25-08-31 07:07:25 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:07:25 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:08:24 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:08:24 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:08:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:08:24 | D | - sum error = [ 5583.0085, 5591.6697, 5588.7553, 5588.2623, 5588.7473] +25-08-31 07:08:24 | D | - best error = [ 5583.0085, 5583.0085, 5583.0085, 5583.0085, 5583.0085] +25-08-31 07:08:24 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:08:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:08:24 | D | - sum error = [ 5584.8626, 5590.2855, 5574.3667, 5577.9278, 5592.3331] +25-08-31 07:08:24 | D | - best error = [ 5583.0085, 5583.0085, 5574.3667, 5574.3667, 5574.3667] +25-08-31 07:08:24 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:08:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:08:24 | D | - sum error = [ 5583.4344, 5589.5587, 5594.3139, 5606.9619, 5616.6549] +25-08-31 07:08:24 | D | - best error = [ 5574.3667, 5574.3667, 5574.3667, 5574.3667, 5574.3667] +25-08-31 07:08:24 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:08:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:08:24 | D | - sum error = [ 5624.2975, 5618.5781, 5619.5970, 5630.0334, 5645.4360] +25-08-31 07:08:24 | D | - best error = [ 5574.3667, 5574.3667, 5574.3667, 5574.3667, 5574.3667] +25-08-31 07:08:24 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:08:24 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:08:24 | D | - sum error = [ 5669.4845, 5650.9419, 5642.6329, 5638.1139, 5647.7989] +25-08-31 07:08:24 | D | - best error = [ 5574.3667, 5574.3667, 5574.3667, 5574.3667, 5574.3667] +25-08-31 07:08:24 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:08:24 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:08:24 | D | - sum error = [ 5619.0382, 5626.2778, 5617.9706, 5615.2407, 5603.5869] +25-08-31 07:08:24 | D | - best error = [ 5574.3667, 5574.3667, 5574.3667, 5574.3667, 5574.3667] +25-08-31 07:08:24 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:08:24 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:08:24 | D | - sum error = [ 5609.7438, 5618.6600, 5605.0797, 5618.7912, 5599.9675] +25-08-31 07:08:24 | D | - best error = [ 5574.3667, 5574.3667, 5574.3667, 5574.3667, 5574.3667] +25-08-31 07:08:24 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:08:24 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:08:24 | D | - sum error = [ 5611.4980, 5624.1785, 5627.9993, 5640.3355] +25-08-31 07:08:24 | D | - best error = [ 5574.3667, 5574.3667, 5574.3667, 5574.3667] +25-08-31 07:08:24 | D | + error = 5574.3667 +25-08-31 07:08:24 | D | + scale = [min=1.0999, max=2.1095] +25-08-31 07:08:24 | D | - single_transformer_blocks.11.down_proj +25-08-31 07:08:24 | D | + w: sfp4_e2m1_all +25-08-31 07:08:24 | D | + x: sfp4_e2m1_all +25-08-31 07:08:24 | D | + y: None +25-08-31 07:08:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:08:24 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:08:26 | D | + x - AbsMax +25-08-31 07:08:26 | D | + x = [min=1.6641, max=13.3750] +25-08-31 07:08:26 | D | + w - AbsMax +25-08-31 07:08:26 | D | + w = [min=0.1143, max=1.0469] +25-08-31 07:08:26 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:08:29 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:10:21 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:10:21 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:10:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:10:21 | D | - sum error = [ 6829.8729, 6814.0743, 6811.6303, 6793.7807, 6793.2879] +25-08-31 07:10:21 | D | - best error = [ 6829.8729, 6814.0743, 6811.6303, 6793.7807, 6793.2879] +25-08-31 07:10:21 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:10:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:10:21 | D | - sum error = [ 6770.8617, 6794.0024, 6781.0625, 6794.2225, 6794.1457] +25-08-31 07:10:21 | D | - best error = [ 6770.8617, 6770.8617, 6770.8617, 6770.8617, 6770.8617] +25-08-31 07:10:21 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:10:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:10:21 | D | - sum error = [ 6800.3321, 6817.1692, 6835.5977, 6846.5889, 6879.8000] +25-08-31 07:10:21 | D | - best error = [ 6770.8617, 6770.8617, 6770.8617, 6770.8617, 6770.8617] +25-08-31 07:10:21 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:10:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:10:21 | D | - sum error = [ 6910.8819, 6933.7851, 6978.2643, 7073.8453, 7059.3167] +25-08-31 07:10:21 | D | - best error = [ 6770.8617, 6770.8617, 6770.8617, 6770.8617, 6770.8617] +25-08-31 07:10:21 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:10:21 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:10:21 | D | - sum error = [ 7369.3723, 7285.3964, 7201.8645, 7131.9223, 7066.1925] +25-08-31 07:10:21 | D | - best error = [ 6770.8617, 6770.8617, 6770.8617, 6770.8617, 6770.8617] +25-08-31 07:10:21 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:10:21 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:10:21 | D | - sum error = [ 6992.6324, 6959.9078, 6893.2024, 6877.0450, 6852.5703] +25-08-31 07:10:21 | D | - best error = [ 6770.8617, 6770.8617, 6770.8617, 6770.8617, 6770.8617] +25-08-31 07:10:21 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:10:21 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:10:21 | D | - sum error = [ 6850.7681, 6838.0781, 6853.7695, 6852.4904, 6867.9423] +25-08-31 07:10:21 | D | - best error = [ 6770.8617, 6770.8617, 6770.8617, 6770.8617, 6770.8617] +25-08-31 07:10:21 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:10:21 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:10:21 | D | - sum error = [ 6909.5690, 6984.2151, 6982.7084, 7044.6067] +25-08-31 07:10:21 | D | - best error = [ 6770.8617, 6770.8617, 6770.8617, 6770.8617] +25-08-31 07:10:21 | D | + error = 6770.8617 +25-08-31 07:10:21 | D | + scale = [min=1.1358, max=1.9124] +25-08-31 07:10:40 | D | - Smoothing Diffusion Block single_transformer_blocks.12 +25-08-31 07:10:40 | D | - Skipping Module single_transformer_blocks.12.norm.linear +25-08-31 07:10:40 | D | - Smoothing Transformer Block single_transformer_blocks.12 +25-08-31 07:10:40 | D | - single_transformer_blocks.12.attn.qkv_proj + single_transformer_blocks.12.up_proj +25-08-31 07:10:40 | D | + w: sfp4_e2m1_all +25-08-31 07:10:40 | D | + x: sfp4_e2m1_all +25-08-31 07:10:40 | D | + y: None +25-08-31 07:10:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:10:40 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:10:40 | D | + x - AbsMax +25-08-31 07:10:40 | D | + x = [min=0.1387, max=22.0000] +25-08-31 07:10:40 | D | + w - AbsMax +25-08-31 07:10:40 | D | + w = [min=0.0728, max=0.8086] +25-08-31 07:10:40 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:10:42 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:12:50 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:12:50 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:12:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:12:50 | D | - sum error = [43550.1675, 43282.6040, 42935.9754, 42263.9705, 42300.9046] +25-08-31 07:12:50 | D | - best error = [43550.1675, 43282.6040, 42935.9754, 42263.9705, 42263.9705] +25-08-31 07:12:50 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:12:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:12:50 | D | - sum error = [41913.1475, 41827.6066, 41664.1106, 41621.7912, 41316.8471] +25-08-31 07:12:50 | D | - best error = [41913.1475, 41827.6066, 41664.1106, 41621.7912, 41316.8471] +25-08-31 07:12:50 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:12:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:12:50 | D | - sum error = [41388.2580, 41452.8536, 41258.0841, 41128.8769, 41169.3214] +25-08-31 07:12:50 | D | - best error = [41316.8471, 41316.8471, 41258.0841, 41128.8769, 41128.8769] +25-08-31 07:12:50 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:12:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:12:50 | D | - sum error = [41201.9262, 41125.7630, 41216.0000, 41363.0900, 41381.7660] +25-08-31 07:12:50 | D | - best error = [41128.8769, 41125.7630, 41125.7630, 41125.7630, 41125.7630] +25-08-31 07:12:50 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:12:50 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:12:50 | D | - sum error = [47602.4900, 46504.6379, 45541.1705, 44793.3767, 44308.6816] +25-08-31 07:12:50 | D | - best error = [41125.7630, 41125.7630, 41125.7630, 41125.7630, 41125.7630] +25-08-31 07:12:50 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:12:50 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:12:50 | D | - sum error = [43773.9678, 43468.2192, 42939.0295, 42439.5167, 42033.4297] +25-08-31 07:12:50 | D | - best error = [41125.7630, 41125.7630, 41125.7630, 41125.7630, 41125.7630] +25-08-31 07:12:50 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:12:50 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:12:50 | D | - sum error = [41761.9003, 41590.3173, 41251.8121, 41396.8177, 41305.7176] +25-08-31 07:12:50 | D | - best error = [41125.7630, 41125.7630, 41125.7630, 41125.7630, 41125.7630] +25-08-31 07:12:50 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:12:50 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:12:50 | D | - sum error = [41169.8251, 41263.3982, 41600.6400, 41418.8425] +25-08-31 07:12:50 | D | - best error = [41125.7630, 41125.7630, 41125.7630, 41125.7630] +25-08-31 07:12:50 | D | + error = 41125.7630 +25-08-31 07:12:50 | D | + scale = [min=0.2059, max=11.8560] +25-08-31 07:12:50 | D | - single_transformer_blocks.12.attn.out_proj +25-08-31 07:12:50 | D | + w: sfp4_e2m1_all +25-08-31 07:12:50 | D | + x: sfp4_e2m1_all +25-08-31 07:12:50 | D | + y: None +25-08-31 07:12:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:12:50 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:12:50 | D | + x - AbsMax +25-08-31 07:12:50 | D | + x = [min=1.5156, max=7.2188] +25-08-31 07:12:50 | D | + w - AbsMax +25-08-31 07:12:50 | D | + w = [min=0.1187, max=0.3809] +25-08-31 07:12:50 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:12:51 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:13:50 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:13:50 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:13:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:13:50 | D | - sum error = [ 5230.6543, 5227.9830, 5216.5793, 5226.6318, 5218.6581] +25-08-31 07:13:50 | D | - best error = [ 5230.6543, 5227.9830, 5216.5793, 5216.5793, 5216.5793] +25-08-31 07:13:50 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:13:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:13:50 | D | - sum error = [ 5222.8513, 5217.8425, 5218.6195, 5211.9006, 5224.7905] +25-08-31 07:13:50 | D | - best error = [ 5216.5793, 5216.5793, 5216.5793, 5211.9006, 5211.9006] +25-08-31 07:13:50 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:13:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:13:50 | D | - sum error = [ 5234.3076, 5233.8687, 5229.5591, 5223.0003, 5231.4468] +25-08-31 07:13:50 | D | - best error = [ 5211.9006, 5211.9006, 5211.9006, 5211.9006, 5211.9006] +25-08-31 07:13:50 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:13:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:13:50 | D | - sum error = [ 5233.2304, 5243.4004, 5248.9368, 5256.4239, 5267.7211] +25-08-31 07:13:50 | D | - best error = [ 5211.9006, 5211.9006, 5211.9006, 5211.9006, 5211.9006] +25-08-31 07:13:50 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:13:50 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:13:50 | D | - sum error = [ 5310.4651, 5293.8432, 5275.1692, 5276.0267, 5262.4781] +25-08-31 07:13:50 | D | - best error = [ 5211.9006, 5211.9006, 5211.9006, 5211.9006, 5211.9006] +25-08-31 07:13:50 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:13:50 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:13:50 | D | - sum error = [ 5263.2427, 5254.1112, 5254.4358, 5260.5777, 5255.1908] +25-08-31 07:13:50 | D | - best error = [ 5211.9006, 5211.9006, 5211.9006, 5211.9006, 5211.9006] +25-08-31 07:13:50 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:13:50 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:13:50 | D | - sum error = [ 5248.9367, 5240.5687, 5244.3348, 5249.2495, 5251.7471] +25-08-31 07:13:50 | D | - best error = [ 5211.9006, 5211.9006, 5211.9006, 5211.9006, 5211.9006] +25-08-31 07:13:50 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:13:50 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:13:50 | D | - sum error = [ 5253.1842, 5253.5276, 5261.8770, 5269.2197] +25-08-31 07:13:50 | D | - best error = [ 5211.9006, 5211.9006, 5211.9006, 5211.9006] +25-08-31 07:13:50 | D | + error = 5211.9006 +25-08-31 07:13:50 | D | + scale = [min=1.1810, max=2.2049] +25-08-31 07:13:50 | D | - single_transformer_blocks.12.down_proj +25-08-31 07:13:50 | D | + w: sfp4_e2m1_all +25-08-31 07:13:50 | D | + x: sfp4_e2m1_all +25-08-31 07:13:50 | D | + y: None +25-08-31 07:13:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:13:50 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:13:53 | D | + x - AbsMax +25-08-31 07:13:53 | D | + x = [min=1.6641, max=13.6875] +25-08-31 07:13:53 | D | + w - AbsMax +25-08-31 07:13:53 | D | + w = [min=0.1123, max=0.9531] +25-08-31 07:13:53 | D | + finished resetting calibrator, ram usage: 15.1 +25-08-31 07:13:55 | D | + finished calculating the original outputs, ram usage: 15.1 +25-08-31 07:15:56 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:15:56 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:15:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:15:56 | D | - sum error = [ 6409.3691, 6394.4526, 6407.8380, 6392.2315, 6412.9382] +25-08-31 07:15:56 | D | - best error = [ 6409.3691, 6394.4526, 6394.4526, 6392.2315, 6392.2315] +25-08-31 07:15:56 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:15:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:15:56 | D | - sum error = [ 6406.0977, 6414.9145, 6417.4518, 6420.7828, 6434.8262] +25-08-31 07:15:56 | D | - best error = [ 6392.2315, 6392.2315, 6392.2315, 6392.2315, 6392.2315] +25-08-31 07:15:56 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:15:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:15:56 | D | - sum error = [ 6453.4078, 6469.3783, 6486.4301, 6515.5816, 6552.5812] +25-08-31 07:15:56 | D | - best error = [ 6392.2315, 6392.2315, 6392.2315, 6392.2315, 6392.2315] +25-08-31 07:15:56 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:15:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:15:56 | D | - sum error = [ 6566.3887, 6585.3015, 6624.8309, 6661.5479, 6708.9725] +25-08-31 07:15:56 | D | - best error = [ 6392.2315, 6392.2315, 6392.2315, 6392.2315, 6392.2315] +25-08-31 07:15:56 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:15:56 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:15:56 | D | - sum error = [ 6812.2146, 6766.7398, 6701.4509, 6653.9949, 6586.0884] +25-08-31 07:15:56 | D | - best error = [ 6392.2315, 6392.2315, 6392.2315, 6392.2315, 6392.2315] +25-08-31 07:15:56 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:15:56 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:15:56 | D | - sum error = [ 6554.6442, 6530.1825, 6495.8165, 6476.5207, 6458.1911] +25-08-31 07:15:56 | D | - best error = [ 6392.2315, 6392.2315, 6392.2315, 6392.2315, 6392.2315] +25-08-31 07:15:56 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:15:56 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:15:56 | D | - sum error = [ 6439.6500, 6602.9118, 6472.9845, 6500.4920, 6523.5006] +25-08-31 07:15:56 | D | - best error = [ 6392.2315, 6392.2315, 6392.2315, 6392.2315, 6392.2315] +25-08-31 07:15:56 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:15:56 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:15:56 | D | - sum error = [ 6569.0456, 6589.8734, 6625.6791, 6698.9790] +25-08-31 07:15:56 | D | - best error = [ 6392.2315, 6392.2315, 6392.2315, 6392.2315] +25-08-31 07:15:56 | D | + error = 6392.2315 +25-08-31 07:15:56 | D | + scale = [min=1.0794, max=1.4806] +25-08-31 07:16:16 | D | - Smoothing Diffusion Block single_transformer_blocks.13 +25-08-31 07:16:16 | D | - Skipping Module single_transformer_blocks.13.norm.linear +25-08-31 07:16:16 | D | - Smoothing Transformer Block single_transformer_blocks.13 +25-08-31 07:16:16 | D | - single_transformer_blocks.13.attn.qkv_proj + single_transformer_blocks.13.up_proj +25-08-31 07:16:16 | D | + w: sfp4_e2m1_all +25-08-31 07:16:16 | D | + x: sfp4_e2m1_all +25-08-31 07:16:16 | D | + y: None +25-08-31 07:16:16 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:16:16 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:16:17 | D | + x - AbsMax +25-08-31 07:16:17 | D | + x = [min=0.2021, max=24.2500] +25-08-31 07:16:17 | D | + w - AbsMax +25-08-31 07:16:17 | D | + w = [min=0.0679, max=0.8555] +25-08-31 07:16:17 | D | + finished resetting calibrator, ram usage: 15.1 +25-08-31 07:16:18 | D | + finished calculating the original outputs, ram usage: 15.1 +25-08-31 07:18:25 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:18:25 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:18:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:18:25 | D | - sum error = [47636.4875, 47275.5626, 47107.6683, 46871.9201, 46458.8702] +25-08-31 07:18:25 | D | - best error = [47636.4875, 47275.5626, 47107.6683, 46871.9201, 46458.8702] +25-08-31 07:18:25 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:18:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:18:25 | D | - sum error = [46344.0822, 46477.7913, 46292.2252, 46152.3016, 46231.4511] +25-08-31 07:18:25 | D | - best error = [46344.0822, 46344.0822, 46292.2252, 46152.3016, 46152.3016] +25-08-31 07:18:25 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:18:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:18:25 | D | - sum error = [46131.9679, 46235.5680, 45881.5811, 45832.1040, 45931.5776] +25-08-31 07:18:25 | D | - best error = [46131.9679, 46131.9679, 45881.5811, 45832.1040, 45832.1040] +25-08-31 07:18:25 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:18:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:18:25 | D | - sum error = [45894.4528, 46402.8027, 46229.7982, 46235.6773, 46311.4410] +25-08-31 07:18:25 | D | - best error = [45832.1040, 45832.1040, 45832.1040, 45832.1040, 45832.1040] +25-08-31 07:18:25 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:18:25 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:18:25 | D | - sum error = [54787.7031, 53509.2701, 52591.4621, 51493.8841, 50556.8303] +25-08-31 07:18:25 | D | - best error = [45832.1040, 45832.1040, 45832.1040, 45832.1040, 45832.1040] +25-08-31 07:18:25 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:18:25 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:18:25 | D | - sum error = [49806.7022, 49184.0963, 48722.6718, 48309.6095, 47590.4474] +25-08-31 07:18:25 | D | - best error = [45832.1040, 45832.1040, 45832.1040, 45832.1040, 45832.1040] +25-08-31 07:18:25 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:18:25 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:18:25 | D | - sum error = [47207.2374, 46988.3472, 46607.4343, 46454.6327, 46599.2110] +25-08-31 07:18:25 | D | - best error = [45832.1040, 45832.1040, 45832.1040, 45832.1040, 45832.1040] +25-08-31 07:18:25 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:18:25 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:18:25 | D | - sum error = [46459.0906, 46188.3712, 46408.6806, 46446.7351] +25-08-31 07:18:25 | D | - best error = [45832.1040, 45832.1040, 45832.1040, 45832.1040] +25-08-31 07:18:25 | D | + error = 45832.1040 +25-08-31 07:18:25 | D | + scale = [min=0.3537, max=7.9444] +25-08-31 07:18:25 | D | - single_transformer_blocks.13.attn.out_proj +25-08-31 07:18:25 | D | + w: sfp4_e2m1_all +25-08-31 07:18:25 | D | + x: sfp4_e2m1_all +25-08-31 07:18:25 | D | + y: None +25-08-31 07:18:25 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:18:25 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:18:25 | D | + x - AbsMax +25-08-31 07:18:25 | D | + x = [min=1.4219, max=10.1250] +25-08-31 07:18:25 | D | + w - AbsMax +25-08-31 07:18:25 | D | + w = [min=0.1191, max=0.3945] +25-08-31 07:18:25 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:18:26 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:19:24 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:19:24 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:19:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:19:24 | D | - sum error = [ 6646.0223, 6644.7333, 6632.1767, 6614.6692, 6616.5299] +25-08-31 07:19:24 | D | - best error = [ 6646.0223, 6644.7333, 6632.1767, 6614.6692, 6614.6692] +25-08-31 07:19:24 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:19:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:19:24 | D | - sum error = [ 6596.4754, 6599.6003, 6606.3963, 6600.1521, 6597.8381] +25-08-31 07:19:24 | D | - best error = [ 6596.4754, 6596.4754, 6596.4754, 6596.4754, 6596.4754] +25-08-31 07:19:24 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:19:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:19:24 | D | - sum error = [ 6587.2185, 6600.2194, 6601.6395, 6611.1727, 6612.4380] +25-08-31 07:19:24 | D | - best error = [ 6587.2185, 6587.2185, 6587.2185, 6587.2185, 6587.2185] +25-08-31 07:19:24 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:19:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:19:24 | D | - sum error = [ 6615.0368, 6620.7205, 6621.2476, 6648.5896, 6652.0171] +25-08-31 07:19:24 | D | - best error = [ 6587.2185, 6587.2185, 6587.2185, 6587.2185, 6587.2185] +25-08-31 07:19:24 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:19:24 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:19:24 | D | - sum error = [ 6690.7237, 6691.9408, 6673.3629, 6650.6776, 6635.2607] +25-08-31 07:19:24 | D | - best error = [ 6587.2185, 6587.2185, 6587.2185, 6587.2185, 6587.2185] +25-08-31 07:19:24 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:19:24 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:19:24 | D | - sum error = [ 6646.5621, 6624.2370, 6629.9689, 6599.8091, 6633.9572] +25-08-31 07:19:24 | D | - best error = [ 6587.2185, 6587.2185, 6587.2185, 6587.2185, 6587.2185] +25-08-31 07:19:24 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:19:24 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:19:24 | D | - sum error = [ 6613.9274, 6614.9316, 6605.1743, 6616.8956, 6613.8806] +25-08-31 07:19:24 | D | - best error = [ 6587.2185, 6587.2185, 6587.2185, 6587.2185, 6587.2185] +25-08-31 07:19:24 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:19:24 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:19:24 | D | - sum error = [ 6620.1317, 6618.9809, 6646.8432, 6662.5953] +25-08-31 07:19:24 | D | - best error = [ 6587.2185, 6587.2185, 6587.2185, 6587.2185] +25-08-31 07:19:24 | D | + error = 6587.2185 +25-08-31 07:19:24 | D | + scale = [min=1.1924, max=3.1820] +25-08-31 07:19:24 | D | - single_transformer_blocks.13.down_proj +25-08-31 07:19:24 | D | + w: sfp4_e2m1_all +25-08-31 07:19:24 | D | + x: sfp4_e2m1_all +25-08-31 07:19:24 | D | + y: None +25-08-31 07:19:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:19:24 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:19:27 | D | + x - AbsMax +25-08-31 07:19:27 | D | + x = [min=1.5078, max=25.5000] +25-08-31 07:19:27 | D | + w - AbsMax +25-08-31 07:19:27 | D | + w = [min=0.1172, max=1.0078] +25-08-31 07:19:27 | D | + finished resetting calibrator, ram usage: 15.1 +25-08-31 07:19:29 | D | + finished calculating the original outputs, ram usage: 15.1 +25-08-31 07:21:25 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:21:25 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:21:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:21:25 | D | - sum error = [ 6660.2438, 6765.7091, 6672.7125, 6663.2503, 6676.6012] +25-08-31 07:21:25 | D | - best error = [ 6660.2438, 6660.2438, 6660.2438, 6660.2438, 6660.2438] +25-08-31 07:21:25 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:21:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:21:25 | D | - sum error = [ 6674.0926, 6686.5641, 6703.3389, 6699.4283, 6727.2907] +25-08-31 07:21:25 | D | - best error = [ 6660.2438, 6660.2438, 6660.2438, 6660.2438, 6660.2438] +25-08-31 07:21:25 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:21:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:21:25 | D | - sum error = [ 6743.6136, 6779.6203, 6792.3448, 6795.1203, 6844.5447] +25-08-31 07:21:25 | D | - best error = [ 6660.2438, 6660.2438, 6660.2438, 6660.2438, 6660.2438] +25-08-31 07:21:25 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:21:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:21:25 | D | - sum error = [ 6883.4509, 6895.7117, 6933.6631, 6967.2020, 7007.5325] +25-08-31 07:21:25 | D | - best error = [ 6660.2438, 6660.2438, 6660.2438, 6660.2438, 6660.2438] +25-08-31 07:21:25 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:21:25 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:21:25 | D | - sum error = [ 7058.4531, 7007.0673, 6966.5263, 6921.3265, 6884.2423] +25-08-31 07:21:25 | D | - best error = [ 6660.2438, 6660.2438, 6660.2438, 6660.2438, 6660.2438] +25-08-31 07:21:25 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:21:25 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:21:25 | D | - sum error = [ 6843.7940, 6808.6272, 6781.8446, 6774.7856, 6769.3453] +25-08-31 07:21:25 | D | - best error = [ 6660.2438, 6660.2438, 6660.2438, 6660.2438, 6660.2438] +25-08-31 07:21:25 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:21:25 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:21:25 | D | - sum error = [ 6777.7973, 6770.7636, 6785.5585, 6811.3300, 6836.9932] +25-08-31 07:21:25 | D | - best error = [ 6660.2438, 6660.2438, 6660.2438, 6660.2438, 6660.2438] +25-08-31 07:21:25 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:21:25 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:21:25 | D | - sum error = [ 6872.1560, 6911.8900, 6963.0355, 7006.0854] +25-08-31 07:21:25 | D | - best error = [ 6660.2438, 6660.2438, 6660.2438, 6660.2438] +25-08-31 07:21:25 | D | + error = 6660.2438 +25-08-31 07:21:25 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 07:21:43 | D | - Smoothing Diffusion Block single_transformer_blocks.14 +25-08-31 07:21:43 | D | - Skipping Module single_transformer_blocks.14.norm.linear +25-08-31 07:21:43 | D | - Smoothing Transformer Block single_transformer_blocks.14 +25-08-31 07:21:43 | D | - single_transformer_blocks.14.attn.qkv_proj + single_transformer_blocks.14.up_proj +25-08-31 07:21:43 | D | + w: sfp4_e2m1_all +25-08-31 07:21:43 | D | + x: sfp4_e2m1_all +25-08-31 07:21:43 | D | + y: None +25-08-31 07:21:43 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:21:43 | D | + finished parsing calibration arguments, ram usage: 15.1 +25-08-31 07:21:44 | D | + x - AbsMax +25-08-31 07:21:44 | D | + x = [min=0.1367, max=30.7500] +25-08-31 07:21:44 | D | + w - AbsMax +25-08-31 07:21:44 | D | + w = [min=0.0845, max=1.1094] +25-08-31 07:21:44 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:21:45 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:23:51 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:23:51 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:23:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:23:51 | D | - sum error = [49392.8941, 49026.2062, 48496.5094, 48310.7488, 47391.1870] +25-08-31 07:23:51 | D | - best error = [49392.8941, 49026.2062, 48496.5094, 48310.7488, 47391.1870] +25-08-31 07:23:51 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:23:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:23:51 | D | - sum error = [47149.0844, 46794.5952, 46934.0167, 46625.0460, 46464.6364] +25-08-31 07:23:51 | D | - best error = [47149.0844, 46794.5952, 46794.5952, 46625.0460, 46464.6364] +25-08-31 07:23:51 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:23:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:23:51 | D | - sum error = [46190.8777, 46389.1877, 46032.3531, 46273.3859, 46238.4401] +25-08-31 07:23:51 | D | - best error = [46190.8777, 46190.8777, 46032.3531, 46032.3531, 46032.3531] +25-08-31 07:23:51 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:23:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:23:51 | D | - sum error = [46337.1215, 46097.0094, 46129.3486, 46198.8343, 46665.5875] +25-08-31 07:23:51 | D | - best error = [46032.3531, 46032.3531, 46032.3531, 46032.3531, 46032.3531] +25-08-31 07:23:51 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:23:51 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:23:51 | D | - sum error = [55879.8139, 54305.7867, 53221.4044, 52045.2726, 50875.8212] +25-08-31 07:23:51 | D | - best error = [46032.3531, 46032.3531, 46032.3531, 46032.3531, 46032.3531] +25-08-31 07:23:51 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:23:51 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:23:51 | D | - sum error = [49903.1459, 49281.7844, 49131.4619, 47963.2286, 47786.0271] +25-08-31 07:23:51 | D | - best error = [46032.3531, 46032.3531, 46032.3531, 46032.3531, 46032.3531] +25-08-31 07:23:51 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:23:51 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:23:51 | D | - sum error = [46946.7757, 46805.6038, 46468.7105, 46366.2912, 46342.6791] +25-08-31 07:23:51 | D | - best error = [46032.3531, 46032.3531, 46032.3531, 46032.3531, 46032.3531] +25-08-31 07:23:51 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:23:51 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:23:51 | D | - sum error = [46372.9245, 46257.6437, 46277.0219, 46394.1376] +25-08-31 07:23:51 | D | - best error = [46032.3531, 46032.3531, 46032.3531, 46032.3531] +25-08-31 07:23:51 | D | + error = 46032.3531 +25-08-31 07:23:51 | D | + scale = [min=0.3030, max=7.8110] +25-08-31 07:23:51 | D | - single_transformer_blocks.14.attn.out_proj +25-08-31 07:23:51 | D | + w: sfp4_e2m1_all +25-08-31 07:23:51 | D | + x: sfp4_e2m1_all +25-08-31 07:23:51 | D | + y: None +25-08-31 07:23:51 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:23:51 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:23:52 | D | + x - AbsMax +25-08-31 07:23:52 | D | + x = [min=1.0781, max=7.9375] +25-08-31 07:23:52 | D | + w - AbsMax +25-08-31 07:23:52 | D | + w = [min=0.1240, max=0.4336] +25-08-31 07:23:52 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:23:53 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:24:50 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:24:50 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:24:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:24:50 | D | - sum error = [ 5934.2460, 5934.5537, 5940.0549, 5939.3131, 5925.6588] +25-08-31 07:24:50 | D | - best error = [ 5934.2460, 5934.2460, 5934.2460, 5934.2460, 5925.6588] +25-08-31 07:24:50 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:24:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:24:50 | D | - sum error = [ 5943.8345, 5935.5697, 5934.0957, 5945.3148, 5932.3530] +25-08-31 07:24:50 | D | - best error = [ 5925.6588, 5925.6588, 5925.6588, 5925.6588, 5925.6588] +25-08-31 07:24:50 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:24:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:24:50 | D | - sum error = [ 5932.2498, 5943.2823, 5947.4136, 5956.8102, 5958.3290] +25-08-31 07:24:50 | D | - best error = [ 5925.6588, 5925.6588, 5925.6588, 5925.6588, 5925.6588] +25-08-31 07:24:50 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:24:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:24:50 | D | - sum error = [ 5962.4965, 5983.7172, 5982.4433, 5982.7031, 5996.6781] +25-08-31 07:24:50 | D | - best error = [ 5925.6588, 5925.6588, 5925.6588, 5925.6588, 5925.6588] +25-08-31 07:24:50 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:24:50 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:24:50 | D | - sum error = [ 5992.5093, 5976.0479, 5973.1352, 5969.1383, 5954.4031] +25-08-31 07:24:50 | D | - best error = [ 5925.6588, 5925.6588, 5925.6588, 5925.6588, 5925.6588] +25-08-31 07:24:50 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:24:50 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:24:50 | D | - sum error = [ 5957.0274, 5945.7363, 5948.1893, 5952.0370, 5944.8034] +25-08-31 07:24:50 | D | - best error = [ 5925.6588, 5925.6588, 5925.6588, 5925.6588, 5925.6588] +25-08-31 07:24:50 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:24:50 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:24:50 | D | - sum error = [ 5948.4983, 5938.4998, 5935.6516, 5952.8403, 5961.8494] +25-08-31 07:24:50 | D | - best error = [ 5925.6588, 5925.6588, 5925.6588, 5925.6588, 5925.6588] +25-08-31 07:24:50 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:24:50 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:24:50 | D | - sum error = [ 5964.2710, 5975.4893, 5979.7174, 5998.9572] +25-08-31 07:24:50 | D | - best error = [ 5925.6588, 5925.6588, 5925.6588, 5925.6588] +25-08-31 07:24:50 | D | + error = 5925.6588 +25-08-31 07:24:50 | D | + scale = [min=1.0152, max=1.5133] +25-08-31 07:24:50 | D | - single_transformer_blocks.14.down_proj +25-08-31 07:24:50 | D | + w: sfp4_e2m1_all +25-08-31 07:24:50 | D | + x: sfp4_e2m1_all +25-08-31 07:24:50 | D | + y: None +25-08-31 07:24:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:24:50 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:24:53 | D | + x - AbsMax +25-08-31 07:24:53 | D | + x = [min=1.4297, max=17.3750] +25-08-31 07:24:53 | D | + w - AbsMax +25-08-31 07:24:53 | D | + w = [min=0.1167, max=1.0391] +25-08-31 07:24:53 | D | + finished resetting calibrator, ram usage: 15.1 +25-08-31 07:24:55 | D | + finished calculating the original outputs, ram usage: 15.1 +25-08-31 07:26:51 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:26:51 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:26:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:26:51 | D | - sum error = [ 6784.9121, 6776.3043, 6771.6562, 6772.7117, 6789.6079] +25-08-31 07:26:51 | D | - best error = [ 6784.9121, 6776.3043, 6771.6562, 6771.6562, 6771.6562] +25-08-31 07:26:51 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:26:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:26:51 | D | - sum error = [ 6789.0368, 6809.8214, 6813.1373, 6825.4588, 6830.9499] +25-08-31 07:26:51 | D | - best error = [ 6771.6562, 6771.6562, 6771.6562, 6771.6562, 6771.6562] +25-08-31 07:26:51 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:26:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:26:51 | D | - sum error = [ 6858.1694, 6887.8394, 6912.3053, 6924.3946, 6950.1016] +25-08-31 07:26:51 | D | - best error = [ 6771.6562, 6771.6562, 6771.6562, 6771.6562, 6771.6562] +25-08-31 07:26:51 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:26:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:26:51 | D | - sum error = [ 6990.0406, 7034.1311, 7056.4209, 7101.6229, 7151.6222] +25-08-31 07:26:51 | D | - best error = [ 6771.6562, 6771.6562, 6771.6562, 6771.6562, 6771.6562] +25-08-31 07:26:51 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:26:51 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:26:51 | D | - sum error = [ 7224.6445, 7165.3047, 7094.8345, 7062.5265, 7002.1734] +25-08-31 07:26:51 | D | - best error = [ 6771.6562, 6771.6562, 6771.6562, 6771.6562, 6771.6562] +25-08-31 07:26:51 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:26:51 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:26:51 | D | - sum error = [ 6969.4132, 6938.9025, 6926.3237, 6896.8335, 6889.8874] +25-08-31 07:26:51 | D | - best error = [ 6771.6562, 6771.6562, 6771.6562, 6771.6562, 6771.6562] +25-08-31 07:26:51 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:26:51 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:26:51 | D | - sum error = [ 6885.6793, 6889.2197, 6909.5325, 6923.8891, 6966.3270] +25-08-31 07:26:51 | D | - best error = [ 6771.6562, 6771.6562, 6771.6562, 6771.6562, 6771.6562] +25-08-31 07:26:51 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:26:51 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:26:51 | D | - sum error = [ 6986.1612, 7042.0554, 7081.0358, 7145.3910] +25-08-31 07:26:51 | D | - best error = [ 6771.6562, 6771.6562, 6771.6562, 6771.6562] +25-08-31 07:26:51 | D | + error = 6771.6562 +25-08-31 07:26:51 | D | + scale = [min=1.0364, max=1.3304] +25-08-31 07:27:09 | D | - Smoothing Diffusion Block single_transformer_blocks.15 +25-08-31 07:27:09 | D | - Skipping Module single_transformer_blocks.15.norm.linear +25-08-31 07:27:09 | D | - Smoothing Transformer Block single_transformer_blocks.15 +25-08-31 07:27:09 | D | - single_transformer_blocks.15.attn.qkv_proj + single_transformer_blocks.15.up_proj +25-08-31 07:27:09 | D | + w: sfp4_e2m1_all +25-08-31 07:27:09 | D | + x: sfp4_e2m1_all +25-08-31 07:27:09 | D | + y: None +25-08-31 07:27:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:27:09 | D | + finished parsing calibration arguments, ram usage: 15.1 +25-08-31 07:27:09 | D | + x - AbsMax +25-08-31 07:27:09 | D | + x = [min=0.2910, max=21.3750] +25-08-31 07:27:09 | D | + w - AbsMax +25-08-31 07:27:09 | D | + w = [min=0.1328, max=0.7812] +25-08-31 07:27:09 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:27:10 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:29:18 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:29:18 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:29:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:29:18 | D | - sum error = [46809.5990, 46686.7580, 46216.1142, 46021.4545, 45810.9479] +25-08-31 07:29:18 | D | - best error = [46809.5990, 46686.7580, 46216.1142, 46021.4545, 45810.9479] +25-08-31 07:29:18 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:29:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:29:18 | D | - sum error = [45587.5548, 45420.5479, 45403.3105, 45098.8348, 45202.9082] +25-08-31 07:29:18 | D | - best error = [45587.5548, 45420.5479, 45403.3105, 45098.8348, 45098.8348] +25-08-31 07:29:18 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:29:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:29:18 | D | - sum error = [45216.3369, 45112.0489, 45215.4367, 45184.6601, 45252.5995] +25-08-31 07:29:18 | D | - best error = [45098.8348, 45098.8348, 45098.8348, 45098.8348, 45098.8348] +25-08-31 07:29:18 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:29:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:29:18 | D | - sum error = [45440.1070, 45620.2341, 45480.8098, 45581.1161, 45707.9256] +25-08-31 07:29:18 | D | - best error = [45098.8348, 45098.8348, 45098.8348, 45098.8348, 45098.8348] +25-08-31 07:29:18 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:29:18 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:29:18 | D | - sum error = [55098.7733, 53260.8446, 52168.6275, 51358.1109, 50036.4411] +25-08-31 07:29:18 | D | - best error = [45098.8348, 45098.8348, 45098.8348, 45098.8348, 45098.8348] +25-08-31 07:29:18 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:29:18 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:29:18 | D | - sum error = [48977.0663, 48140.3282, 47455.4228, 46927.5254, 46752.1150] +25-08-31 07:29:18 | D | - best error = [45098.8348, 45098.8348, 45098.8348, 45098.8348, 45098.8348] +25-08-31 07:29:18 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:29:18 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:29:18 | D | - sum error = [46419.9984, 46298.9714, 45757.2573, 45563.5447, 45575.8701] +25-08-31 07:29:18 | D | - best error = [45098.8348, 45098.8348, 45098.8348, 45098.8348, 45098.8348] +25-08-31 07:29:18 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:29:18 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:29:18 | D | - sum error = [45620.1555, 45426.3599, 45678.0273, 45778.1894] +25-08-31 07:29:18 | D | - best error = [45098.8348, 45098.8348, 45098.8348, 45098.8348] +25-08-31 07:29:18 | D | + error = 45098.8348 +25-08-31 07:29:18 | D | + scale = [min=0.6103, max=3.4038] +25-08-31 07:29:18 | D | - single_transformer_blocks.15.attn.out_proj +25-08-31 07:29:18 | D | + w: sfp4_e2m1_all +25-08-31 07:29:18 | D | + x: sfp4_e2m1_all +25-08-31 07:29:18 | D | + y: None +25-08-31 07:29:18 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:29:18 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:29:18 | D | + x - AbsMax +25-08-31 07:29:18 | D | + x = [min=1.3828, max=8.6250] +25-08-31 07:29:18 | D | + w - AbsMax +25-08-31 07:29:18 | D | + w = [min=0.1206, max=0.4512] +25-08-31 07:29:18 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:29:19 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:30:16 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:30:16 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:30:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:30:16 | D | - sum error = [ 6305.1268, 6300.1159, 6287.6460, 6285.8151, 6277.0260] +25-08-31 07:30:16 | D | - best error = [ 6305.1268, 6300.1159, 6287.6460, 6285.8151, 6277.0260] +25-08-31 07:30:16 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:30:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:30:16 | D | - sum error = [ 6272.1467, 6287.3562, 6276.9521, 6259.7645, 6263.6402] +25-08-31 07:30:16 | D | - best error = [ 6272.1467, 6272.1467, 6272.1467, 6259.7645, 6259.7645] +25-08-31 07:30:16 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:30:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:30:16 | D | - sum error = [ 6259.4709, 6269.1719, 6273.0554, 6258.9916, 6249.9290] +25-08-31 07:30:16 | D | - best error = [ 6259.4709, 6259.4709, 6259.4709, 6258.9916, 6249.9290] +25-08-31 07:30:16 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:30:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:30:16 | D | - sum error = [ 6265.9087, 6281.7631, 6285.3700, 6292.5467, 6321.5650] +25-08-31 07:30:16 | D | - best error = [ 6249.9290, 6249.9290, 6249.9290, 6249.9290, 6249.9290] +25-08-31 07:30:16 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:30:16 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:30:16 | D | - sum error = [ 6419.6427, 6390.5289, 6378.1257, 6358.3702, 6349.4690] +25-08-31 07:30:16 | D | - best error = [ 6249.9290, 6249.9290, 6249.9290, 6249.9290, 6249.9290] +25-08-31 07:30:16 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:30:16 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:30:16 | D | - sum error = [ 6331.2893, 6313.1238, 6302.2288, 6298.8612, 6293.7750] +25-08-31 07:30:16 | D | - best error = [ 6249.9290, 6249.9290, 6249.9290, 6249.9290, 6249.9290] +25-08-31 07:30:16 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:30:16 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:30:16 | D | - sum error = [ 6300.4117, 6292.6627, 6289.3106, 6293.1270, 6279.5901] +25-08-31 07:30:16 | D | - best error = [ 6249.9290, 6249.9290, 6249.9290, 6249.9290, 6249.9290] +25-08-31 07:30:16 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:30:16 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:30:16 | D | - sum error = [ 6275.9111, 6282.9856, 6281.7523, 6299.9108] +25-08-31 07:30:16 | D | - best error = [ 6249.9290, 6249.9290, 6249.9290, 6249.9290] +25-08-31 07:30:16 | D | + error = 6249.9290 +25-08-31 07:30:16 | D | + scale = [min=1.2547, max=4.5189] +25-08-31 07:30:16 | D | - single_transformer_blocks.15.down_proj +25-08-31 07:30:16 | D | + w: sfp4_e2m1_all +25-08-31 07:30:16 | D | + x: sfp4_e2m1_all +25-08-31 07:30:16 | D | + y: None +25-08-31 07:30:16 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:30:16 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:30:18 | D | + x - AbsMax +25-08-31 07:30:18 | D | + x = [min=1.5312, max=18.5000] +25-08-31 07:30:18 | D | + w - AbsMax +25-08-31 07:30:18 | D | + w = [min=0.1167, max=1.2578] +25-08-31 07:30:18 | D | + finished resetting calibrator, ram usage: 15.1 +25-08-31 07:30:20 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:32:10 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:32:10 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:32:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:32:10 | D | - sum error = [ 6798.7519, 6783.2069, 6804.0697, 6788.9858, 6796.6501] +25-08-31 07:32:10 | D | - best error = [ 6798.7519, 6783.2069, 6783.2069, 6783.2069, 6783.2069] +25-08-31 07:32:10 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:32:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:32:10 | D | - sum error = [ 6797.2256, 6819.6821, 6832.1306, 6843.8013, 6854.4931] +25-08-31 07:32:10 | D | - best error = [ 6783.2069, 6783.2069, 6783.2069, 6783.2069, 6783.2069] +25-08-31 07:32:10 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:32:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:32:10 | D | - sum error = [ 6871.2695, 6881.1412, 6918.9181, 6941.9764, 6981.8129] +25-08-31 07:32:10 | D | - best error = [ 6783.2069, 6783.2069, 6783.2069, 6783.2069, 6783.2069] +25-08-31 07:32:10 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:32:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:32:10 | D | - sum error = [ 6993.1169, 7040.1889, 7080.9302, 7118.8870, 7170.7057] +25-08-31 07:32:10 | D | - best error = [ 6783.2069, 6783.2069, 6783.2069, 6783.2069, 6783.2069] +25-08-31 07:32:10 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:32:10 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:32:10 | D | - sum error = [ 7236.0603, 7154.2480, 7112.7905, 7074.9541, 7020.8028] +25-08-31 07:32:10 | D | - best error = [ 6783.2069, 6783.2069, 6783.2069, 6783.2069, 6783.2069] +25-08-31 07:32:10 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:32:10 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:32:10 | D | - sum error = [ 6981.6477, 6954.8087, 6935.7777, 6915.3753, 6928.0225] +25-08-31 07:32:10 | D | - best error = [ 6783.2069, 6783.2069, 6783.2069, 6783.2069, 6783.2069] +25-08-31 07:32:10 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:32:10 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:32:10 | D | - sum error = [ 6909.8388, 6920.9823, 6934.1295, 6956.3913, 6980.3054] +25-08-31 07:32:10 | D | - best error = [ 6783.2069, 6783.2069, 6783.2069, 6783.2069, 6783.2069] +25-08-31 07:32:10 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:32:10 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:32:10 | D | - sum error = [ 7007.5494, 7141.2304, 7099.5082, 7154.3131] +25-08-31 07:32:10 | D | - best error = [ 6783.2069, 6783.2069, 6783.2069, 6783.2069] +25-08-31 07:32:10 | D | + error = 6783.2069 +25-08-31 07:32:10 | D | + scale = [min=1.0215, max=1.1571] +25-08-31 07:32:27 | D | - Smoothing Diffusion Block single_transformer_blocks.16 +25-08-31 07:32:27 | D | - Skipping Module single_transformer_blocks.16.norm.linear +25-08-31 07:32:27 | D | - Smoothing Transformer Block single_transformer_blocks.16 +25-08-31 07:32:27 | D | - single_transformer_blocks.16.attn.qkv_proj + single_transformer_blocks.16.up_proj +25-08-31 07:32:27 | D | + w: sfp4_e2m1_all +25-08-31 07:32:27 | D | + x: sfp4_e2m1_all +25-08-31 07:32:27 | D | + y: None +25-08-31 07:32:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:32:27 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:32:28 | D | + x - AbsMax +25-08-31 07:32:28 | D | + x = [min=0.4082, max=25.6250] +25-08-31 07:32:28 | D | + w - AbsMax +25-08-31 07:32:28 | D | + w = [min=0.0938, max=1.1797] +25-08-31 07:32:28 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:32:29 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:34:36 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:34:36 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:34:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:34:36 | D | - sum error = [48474.4129, 48458.9023, 47080.9346, 46389.4278, 46641.4847] +25-08-31 07:34:36 | D | - best error = [48474.4129, 48458.9023, 47080.9346, 46389.4278, 46389.4278] +25-08-31 07:34:36 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:34:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:34:36 | D | - sum error = [46560.6405, 46648.6551, 46329.4719, 46867.4495, 45291.7921] +25-08-31 07:34:36 | D | - best error = [46389.4278, 46389.4278, 46329.4719, 46329.4719, 45291.7921] +25-08-31 07:34:36 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:34:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:34:36 | D | - sum error = [44976.3657, 45168.1346, 45213.0926, 45136.9557, 45318.1945] +25-08-31 07:34:36 | D | - best error = [44976.3657, 44976.3657, 44976.3657, 44976.3657, 44976.3657] +25-08-31 07:34:36 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:34:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:34:36 | D | - sum error = [45473.4131, 44730.4145, 44620.1303, 44773.1577, 45445.4095] +25-08-31 07:34:36 | D | - best error = [44976.3657, 44730.4145, 44620.1303, 44620.1303, 44620.1303] +25-08-31 07:34:36 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:34:36 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:34:36 | D | - sum error = [59943.0108, 59119.3267, 57173.1786, 53872.6201, 52978.8752] +25-08-31 07:34:36 | D | - best error = [44620.1303, 44620.1303, 44620.1303, 44620.1303, 44620.1303] +25-08-31 07:34:36 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:34:36 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:34:36 | D | - sum error = [51214.5554, 50028.5026, 49521.4552, 48346.5305, 48177.7828] +25-08-31 07:34:36 | D | - best error = [44620.1303, 44620.1303, 44620.1303, 44620.1303, 44620.1303] +25-08-31 07:34:36 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:34:36 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:34:36 | D | - sum error = [46982.3265, 46502.7249, 46954.5073, 45306.1734, 45001.9107] +25-08-31 07:34:36 | D | - best error = [44620.1303, 44620.1303, 44620.1303, 44620.1303, 44620.1303] +25-08-31 07:34:36 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:34:36 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:34:36 | D | - sum error = [45002.3089, 44713.0971, 44568.0969, 44823.2911] +25-08-31 07:34:36 | D | - best error = [44620.1303, 44620.1303, 44568.0969, 44568.0969] +25-08-31 07:34:36 | D | + error = 44568.0969 +25-08-31 07:34:36 | D | + scale = [min=0.5139, max=20.1873] +25-08-31 07:34:36 | D | - single_transformer_blocks.16.attn.out_proj +25-08-31 07:34:36 | D | + w: sfp4_e2m1_all +25-08-31 07:34:36 | D | + x: sfp4_e2m1_all +25-08-31 07:34:36 | D | + y: None +25-08-31 07:34:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:34:36 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:34:36 | D | + x - AbsMax +25-08-31 07:34:36 | D | + x = [min=0.9102, max=12.8125] +25-08-31 07:34:36 | D | + w - AbsMax +25-08-31 07:34:36 | D | + w = [min=0.1133, max=0.4023] +25-08-31 07:34:36 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:34:37 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:35:34 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:35:34 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:35:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:35:34 | D | - sum error = [ 5696.3945, 5689.4239, 5692.2730, 5686.8151, 5682.1406] +25-08-31 07:35:34 | D | - best error = [ 5696.3945, 5689.4239, 5689.4239, 5686.8151, 5682.1406] +25-08-31 07:35:34 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:35:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:35:34 | D | - sum error = [ 5684.9151, 5693.8969, 5678.1800, 5682.0975, 5681.3407] +25-08-31 07:35:34 | D | - best error = [ 5682.1406, 5682.1406, 5678.1800, 5678.1800, 5678.1800] +25-08-31 07:35:34 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:35:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:35:34 | D | - sum error = [ 5686.5455, 5710.8041, 5706.7995, 5707.7323, 5718.9263] +25-08-31 07:35:34 | D | - best error = [ 5678.1800, 5678.1800, 5678.1800, 5678.1800, 5678.1800] +25-08-31 07:35:34 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:35:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:35:34 | D | - sum error = [ 5729.1149, 5746.1220, 5767.4604, 5784.0648, 5810.9166] +25-08-31 07:35:34 | D | - best error = [ 5678.1800, 5678.1800, 5678.1800, 5678.1800, 5678.1800] +25-08-31 07:35:34 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:35:34 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:35:34 | D | - sum error = [ 5750.2856, 5733.8298, 5718.8702, 5719.1610, 5704.2663] +25-08-31 07:35:34 | D | - best error = [ 5678.1800, 5678.1800, 5678.1800, 5678.1800, 5678.1800] +25-08-31 07:35:34 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:35:34 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:35:34 | D | - sum error = [ 5716.2759, 5698.5749, 5700.6972, 5711.8078, 5708.7400] +25-08-31 07:35:34 | D | - best error = [ 5678.1800, 5678.1800, 5678.1800, 5678.1800, 5678.1800] +25-08-31 07:35:34 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:35:34 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:35:34 | D | - sum error = [ 5696.7775, 5699.8754, 5730.1119, 5722.0227, 5734.8883] +25-08-31 07:35:34 | D | - best error = [ 5678.1800, 5678.1800, 5678.1800, 5678.1800, 5678.1800] +25-08-31 07:35:34 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:35:34 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:35:34 | D | - sum error = [ 5758.3367, 5772.7051, 5772.6777, 5795.8685] +25-08-31 07:35:34 | D | - best error = [ 5678.1800, 5678.1800, 5678.1800, 5678.1800] +25-08-31 07:35:34 | D | + error = 5678.1800 +25-08-31 07:35:34 | D | + scale = [min=0.9676, max=2.4416] +25-08-31 07:35:34 | D | - single_transformer_blocks.16.down_proj +25-08-31 07:35:34 | D | + w: sfp4_e2m1_all +25-08-31 07:35:34 | D | + x: sfp4_e2m1_all +25-08-31 07:35:34 | D | + y: None +25-08-31 07:35:34 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:35:34 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:35:36 | D | + x - AbsMax +25-08-31 07:35:36 | D | + x = [min=0.9609, max=15.2500] +25-08-31 07:35:36 | D | + w - AbsMax +25-08-31 07:35:36 | D | + w = [min=0.1133, max=1.1406] +25-08-31 07:35:36 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:35:38 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:37:27 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:37:27 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:37:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:37:27 | D | - sum error = [ 6281.0598, 6288.8972, 6300.0197, 6296.0731, 6309.4101] +25-08-31 07:37:27 | D | - best error = [ 6281.0598, 6281.0598, 6281.0598, 6281.0598, 6281.0598] +25-08-31 07:37:27 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:37:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:37:27 | D | - sum error = [ 6310.0374, 6328.3710, 6338.6813, 6355.2366, 6363.0442] +25-08-31 07:37:27 | D | - best error = [ 6281.0598, 6281.0598, 6281.0598, 6281.0598, 6281.0598] +25-08-31 07:37:27 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:37:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:37:27 | D | - sum error = [ 6389.7825, 6400.9980, 6421.5830, 6449.1962, 6476.7856] +25-08-31 07:37:27 | D | - best error = [ 6281.0598, 6281.0598, 6281.0598, 6281.0598, 6281.0598] +25-08-31 07:37:27 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:37:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:37:27 | D | - sum error = [ 6510.9486, 6535.9807, 6570.5077, 6617.7446, 6651.7751] +25-08-31 07:37:27 | D | - best error = [ 6281.0598, 6281.0598, 6281.0598, 6281.0598, 6281.0598] +25-08-31 07:37:27 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:37:27 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:37:27 | D | - sum error = [ 6789.3961, 6726.7080, 6658.5836, 6614.3278, 6570.3297] +25-08-31 07:37:27 | D | - best error = [ 6281.0598, 6281.0598, 6281.0598, 6281.0598, 6281.0598] +25-08-31 07:37:27 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:37:27 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:37:27 | D | - sum error = [ 6534.4723, 6488.1689, 6470.8256, 6450.8207, 6436.3844] +25-08-31 07:37:27 | D | - best error = [ 6281.0598, 6281.0598, 6281.0598, 6281.0598, 6281.0598] +25-08-31 07:37:27 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:37:27 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:37:27 | D | - sum error = [ 6436.7302, 6436.2060, 6452.7582, 6466.4179, 6602.9836] +25-08-31 07:37:27 | D | - best error = [ 6281.0598, 6281.0598, 6281.0598, 6281.0598, 6281.0598] +25-08-31 07:37:27 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:37:27 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:37:27 | D | - sum error = [ 6507.1450, 6535.8045, 6582.3388, 6644.1201] +25-08-31 07:37:27 | D | - best error = [ 6281.0598, 6281.0598, 6281.0598, 6281.0598] +25-08-31 07:37:27 | D | + error = 6281.0598 +25-08-31 07:37:27 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 07:37:45 | D | - Smoothing Diffusion Block single_transformer_blocks.17 +25-08-31 07:37:45 | D | - Skipping Module single_transformer_blocks.17.norm.linear +25-08-31 07:37:45 | D | - Smoothing Transformer Block single_transformer_blocks.17 +25-08-31 07:37:45 | D | - single_transformer_blocks.17.attn.qkv_proj + single_transformer_blocks.17.up_proj +25-08-31 07:37:45 | D | + w: sfp4_e2m1_all +25-08-31 07:37:45 | D | + x: sfp4_e2m1_all +25-08-31 07:37:45 | D | + y: None +25-08-31 07:37:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:37:45 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:37:45 | D | + x - AbsMax +25-08-31 07:37:45 | D | + x = [min=0.3398, max=22.3750] +25-08-31 07:37:45 | D | + w - AbsMax +25-08-31 07:37:45 | D | + w = [min=0.1367, max=0.8164] +25-08-31 07:37:45 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:37:47 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:39:54 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:39:54 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:39:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:39:54 | D | - sum error = [49067.4872, 48668.4575, 48300.2158, 48018.6390, 47804.5835] +25-08-31 07:39:54 | D | - best error = [49067.4872, 48668.4575, 48300.2158, 48018.6390, 47804.5835] +25-08-31 07:39:54 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:39:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:39:54 | D | - sum error = [47616.0161, 47539.0211, 47506.4142, 47335.3693, 47195.2920] +25-08-31 07:39:54 | D | - best error = [47616.0161, 47539.0211, 47506.4142, 47335.3693, 47195.2920] +25-08-31 07:39:54 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:39:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:39:54 | D | - sum error = [47065.5903, 47242.3539, 46960.5179, 47043.8801, 47150.4282] +25-08-31 07:39:54 | D | - best error = [47065.5903, 47065.5903, 46960.5179, 46960.5179, 46960.5179] +25-08-31 07:39:54 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:39:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:39:54 | D | - sum error = [47499.7721, 47422.4993, 47439.8489, 47613.2338, 47833.6745] +25-08-31 07:39:54 | D | - best error = [46960.5179, 46960.5179, 46960.5179, 46960.5179, 46960.5179] +25-08-31 07:39:54 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:39:54 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:39:54 | D | - sum error = [65766.4075, 62277.4160, 59549.7388, 57124.8267, 55368.8502] +25-08-31 07:39:54 | D | - best error = [46960.5179, 46960.5179, 46960.5179, 46960.5179, 46960.5179] +25-08-31 07:39:54 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:39:54 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:39:54 | D | - sum error = [53368.3400, 51912.2681, 50753.8285, 49954.7110, 49117.1647] +25-08-31 07:39:54 | D | - best error = [46960.5179, 46960.5179, 46960.5179, 46960.5179, 46960.5179] +25-08-31 07:39:54 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:39:54 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:39:54 | D | - sum error = [48717.8430, 48192.0903, 47743.7641, 47508.0874, 47407.6139] +25-08-31 07:39:54 | D | - best error = [46960.5179, 46960.5179, 46960.5179, 46960.5179, 46960.5179] +25-08-31 07:39:54 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:39:54 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:39:54 | D | - sum error = [47270.7537, 47292.9634, 47612.4058, 47517.7104] +25-08-31 07:39:54 | D | - best error = [46960.5179, 46960.5179, 46960.5179, 46960.5179] +25-08-31 07:39:54 | D | + error = 46960.5179 +25-08-31 07:39:54 | D | + scale = [min=0.5233, max=6.4544] +25-08-31 07:39:54 | D | - single_transformer_blocks.17.attn.out_proj +25-08-31 07:39:54 | D | + w: sfp4_e2m1_all +25-08-31 07:39:54 | D | + x: sfp4_e2m1_all +25-08-31 07:39:54 | D | + y: None +25-08-31 07:39:54 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:39:54 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:39:55 | D | + x - AbsMax +25-08-31 07:39:55 | D | + x = [min=1.2109, max=9.2500] +25-08-31 07:39:55 | D | + w - AbsMax +25-08-31 07:39:55 | D | + w = [min=0.1216, max=0.6016] +25-08-31 07:39:55 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:39:56 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:40:53 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:40:53 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:40:53 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:40:53 | D | - sum error = [ 6675.6393, 6649.6683, 6659.7898, 6641.5321, 6636.4451] +25-08-31 07:40:53 | D | - best error = [ 6675.6393, 6649.6683, 6649.6683, 6641.5321, 6636.4451] +25-08-31 07:40:53 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:40:53 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:40:53 | D | - sum error = [ 6624.3988, 6626.2475, 6610.2584, 6616.0538, 6613.9537] +25-08-31 07:40:53 | D | - best error = [ 6624.3988, 6624.3988, 6610.2584, 6610.2584, 6610.2584] +25-08-31 07:40:53 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:40:53 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:40:53 | D | - sum error = [ 6615.7059, 6602.5229, 6607.4921, 6606.3709, 6612.1658] +25-08-31 07:40:53 | D | - best error = [ 6610.2584, 6602.5229, 6602.5229, 6602.5229, 6602.5229] +25-08-31 07:40:53 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:40:53 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:40:53 | D | - sum error = [ 6601.3741, 6615.1033, 6606.1456, 6631.9650, 6648.7867] +25-08-31 07:40:53 | D | - best error = [ 6601.3741, 6601.3741, 6601.3741, 6601.3741, 6601.3741] +25-08-31 07:40:53 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:40:53 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:40:53 | D | - sum error = [ 6765.6855, 6742.8964, 6714.4712, 6693.6265, 6688.9527] +25-08-31 07:40:53 | D | - best error = [ 6601.3741, 6601.3741, 6601.3741, 6601.3741, 6601.3741] +25-08-31 07:40:53 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:40:53 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:40:53 | D | - sum error = [ 6683.1846, 6658.2968, 6646.7791, 6647.1386, 6640.0024] +25-08-31 07:40:53 | D | - best error = [ 6601.3741, 6601.3741, 6601.3741, 6601.3741, 6601.3741] +25-08-31 07:40:53 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:40:53 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:40:53 | D | - sum error = [ 6625.1961, 6617.8422, 6620.5229, 6619.5142, 6625.9433] +25-08-31 07:40:53 | D | - best error = [ 6601.3741, 6601.3741, 6601.3741, 6601.3741, 6601.3741] +25-08-31 07:40:53 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:40:53 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:40:53 | D | - sum error = [ 6625.0108, 6627.8422, 6627.1080, 6634.0648] +25-08-31 07:40:53 | D | - best error = [ 6601.3741, 6601.3741, 6601.3741, 6601.3741] +25-08-31 07:40:53 | D | + error = 6601.3741 +25-08-31 07:40:53 | D | + scale = [min=1.1544, max=5.3040] +25-08-31 07:40:53 | D | - single_transformer_blocks.17.down_proj +25-08-31 07:40:53 | D | + w: sfp4_e2m1_all +25-08-31 07:40:53 | D | + x: sfp4_e2m1_all +25-08-31 07:40:53 | D | + y: None +25-08-31 07:40:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:40:53 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:40:57 | D | + x - AbsMax +25-08-31 07:40:57 | D | + x = [min=0.8828, max=25.7500] +25-08-31 07:40:57 | D | + w - AbsMax +25-08-31 07:40:57 | D | + w = [min=0.1118, max=1.1406] +25-08-31 07:40:57 | D | + finished resetting calibrator, ram usage: 15.1 +25-08-31 07:40:59 | D | + finished calculating the original outputs, ram usage: 15.1 +25-08-31 07:43:02 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:43:02 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:43:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:43:02 | D | - sum error = [ 6363.2988, 6363.0009, 6380.7169, 6363.1791, 6371.6259] +25-08-31 07:43:02 | D | - best error = [ 6363.2988, 6363.0009, 6363.0009, 6363.0009, 6363.0009] +25-08-31 07:43:02 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:43:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:43:02 | D | - sum error = [ 6370.1648, 6381.7255, 6385.2458, 6408.5537, 6429.0463] +25-08-31 07:43:02 | D | - best error = [ 6363.0009, 6363.0009, 6363.0009, 6363.0009, 6363.0009] +25-08-31 07:43:02 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:43:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:43:02 | D | - sum error = [ 6424.9675, 6460.7009, 6495.7478, 6517.5121, 6552.0400] +25-08-31 07:43:02 | D | - best error = [ 6363.0009, 6363.0009, 6363.0009, 6363.0009, 6363.0009] +25-08-31 07:43:02 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:43:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:43:02 | D | - sum error = [ 6602.1684, 6631.7672, 6675.3130, 6731.3945, 6781.9640] +25-08-31 07:43:02 | D | - best error = [ 6363.0009, 6363.0009, 6363.0009, 6363.0009, 6363.0009] +25-08-31 07:43:02 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:43:02 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:43:02 | D | - sum error = [ 7027.4991, 6920.2118, 6842.3651, 6772.1119, 6696.3748] +25-08-31 07:43:02 | D | - best error = [ 6363.0009, 6363.0009, 6363.0009, 6363.0009, 6363.0009] +25-08-31 07:43:02 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:43:02 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:43:02 | D | - sum error = [ 6645.6822, 6607.1825, 6577.2760, 6535.8069, 6528.6525] +25-08-31 07:43:02 | D | - best error = [ 6363.0009, 6363.0009, 6363.0009, 6363.0009, 6363.0009] +25-08-31 07:43:02 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:43:02 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:43:02 | D | - sum error = [ 6627.3711, 6517.6517, 6532.8919, 6545.7449, 6566.1057] +25-08-31 07:43:02 | D | - best error = [ 6363.0009, 6363.0009, 6363.0009, 6363.0009, 6363.0009] +25-08-31 07:43:02 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:43:02 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:43:02 | D | - sum error = [ 6606.2395, 6654.5657, 6700.9423, 6771.4383] +25-08-31 07:43:02 | D | - best error = [ 6363.0009, 6363.0009, 6363.0009, 6363.0009] +25-08-31 07:43:02 | D | + error = 6363.0009 +25-08-31 07:43:02 | D | + scale = [min=0.9938, max=1.1764] +25-08-31 07:43:21 | D | - Smoothing Diffusion Block single_transformer_blocks.18 +25-08-31 07:43:21 | D | - Skipping Module single_transformer_blocks.18.norm.linear +25-08-31 07:43:21 | D | - Smoothing Transformer Block single_transformer_blocks.18 +25-08-31 07:43:21 | D | - single_transformer_blocks.18.attn.qkv_proj + single_transformer_blocks.18.up_proj +25-08-31 07:43:21 | D | + w: sfp4_e2m1_all +25-08-31 07:43:21 | D | + x: sfp4_e2m1_all +25-08-31 07:43:21 | D | + y: None +25-08-31 07:43:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:43:21 | D | + finished parsing calibration arguments, ram usage: 15.1 +25-08-31 07:43:21 | D | + x - AbsMax +25-08-31 07:43:21 | D | + x = [min=0.1289, max=30.5000] +25-08-31 07:43:21 | D | + w - AbsMax +25-08-31 07:43:21 | D | + w = [min=0.1055, max=1.1250] +25-08-31 07:43:21 | D | + finished resetting calibrator, ram usage: 15.1 +25-08-31 07:43:22 | D | + finished calculating the original outputs, ram usage: 15.1 +25-08-31 07:45:30 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:45:30 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:45:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:45:30 | D | - sum error = [46149.7236, 46085.7524, 45480.7479, 45062.7182, 44867.8572] +25-08-31 07:45:30 | D | - best error = [46149.7236, 46085.7524, 45480.7479, 45062.7182, 44867.8572] +25-08-31 07:45:30 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:45:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:45:30 | D | - sum error = [44713.6654, 44608.0732, 44172.1751, 44204.2141, 44240.3414] +25-08-31 07:45:30 | D | - best error = [44713.6654, 44608.0732, 44172.1751, 44172.1751, 44172.1751] +25-08-31 07:45:30 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:45:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:45:30 | D | - sum error = [44050.6777, 43983.2026, 43897.9559, 43861.9784, 43840.5109] +25-08-31 07:45:30 | D | - best error = [44050.6777, 43983.2026, 43897.9559, 43861.9784, 43840.5109] +25-08-31 07:45:30 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:45:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:45:30 | D | - sum error = [43867.0880, 44075.7602, 43989.0372, 44088.1459, 44100.4990] +25-08-31 07:45:30 | D | - best error = [43840.5109, 43840.5109, 43840.5109, 43840.5109, 43840.5109] +25-08-31 07:45:30 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:45:30 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:45:30 | D | - sum error = [53488.4810, 51976.2826, 50779.5310, 49570.0038, 48682.6813] +25-08-31 07:45:30 | D | - best error = [43840.5109, 43840.5109, 43840.5109, 43840.5109, 43840.5109] +25-08-31 07:45:30 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:45:30 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:45:30 | D | - sum error = [47712.4078, 46830.8384, 46309.5726, 45574.9152, 45140.4578] +25-08-31 07:45:30 | D | - best error = [43840.5109, 43840.5109, 43840.5109, 43840.5109, 43840.5109] +25-08-31 07:45:30 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:45:30 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:45:30 | D | - sum error = [44877.3489, 44566.3076, 44585.3515, 44210.6087, 44144.9714] +25-08-31 07:45:30 | D | - best error = [43840.5109, 43840.5109, 43840.5109, 43840.5109, 43840.5109] +25-08-31 07:45:30 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:45:30 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:45:30 | D | - sum error = [44103.3767, 44162.7680, 44060.8713, 44064.3980] +25-08-31 07:45:30 | D | - best error = [43840.5109, 43840.5109, 43840.5109, 43840.5109] +25-08-31 07:45:30 | D | + error = 43840.5109 +25-08-31 07:45:30 | D | + scale = [min=0.2383, max=10.9398] +25-08-31 07:45:30 | D | - single_transformer_blocks.18.attn.out_proj +25-08-31 07:45:30 | D | + w: sfp4_e2m1_all +25-08-31 07:45:30 | D | + x: sfp4_e2m1_all +25-08-31 07:45:30 | D | + y: None +25-08-31 07:45:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:45:30 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:45:30 | D | + x - AbsMax +25-08-31 07:45:30 | D | + x = [min=1.0078, max=11.1875] +25-08-31 07:45:30 | D | + w - AbsMax +25-08-31 07:45:30 | D | + w = [min=0.1123, max=0.4375] +25-08-31 07:45:30 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:45:31 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:46:28 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:46:28 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:46:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:46:28 | D | - sum error = [ 5226.0844, 5211.2475, 5199.8973, 5198.9226, 5186.3330] +25-08-31 07:46:28 | D | - best error = [ 5226.0844, 5211.2475, 5199.8973, 5198.9226, 5186.3330] +25-08-31 07:46:28 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:46:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:46:28 | D | - sum error = [ 5180.1351, 5182.6318, 5180.1156, 5178.3582, 5181.7484] +25-08-31 07:46:28 | D | - best error = [ 5180.1351, 5180.1351, 5180.1156, 5178.3582, 5178.3582] +25-08-31 07:46:28 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:46:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:46:28 | D | - sum error = [ 5183.8160, 5177.8563, 5178.8427, 5186.5833, 5185.8492] +25-08-31 07:46:28 | D | - best error = [ 5178.3582, 5177.8563, 5177.8563, 5177.8563, 5177.8563] +25-08-31 07:46:28 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:46:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:46:28 | D | - sum error = [ 5198.0450, 5197.5394, 5212.6586, 5222.2357, 5227.2008] +25-08-31 07:46:28 | D | - best error = [ 5177.8563, 5177.8563, 5177.8563, 5177.8563, 5177.8563] +25-08-31 07:46:28 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:46:28 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:46:28 | D | - sum error = [ 5306.3784, 5286.5074, 5283.9735, 5267.0555, 5242.7737] +25-08-31 07:46:28 | D | - best error = [ 5177.8563, 5177.8563, 5177.8563, 5177.8563, 5177.8563] +25-08-31 07:46:28 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:46:28 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:46:28 | D | - sum error = [ 5229.8914, 5230.4181, 5225.4441, 5215.4212, 5208.5841] +25-08-31 07:46:28 | D | - best error = [ 5177.8563, 5177.8563, 5177.8563, 5177.8563, 5177.8563] +25-08-31 07:46:28 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:46:28 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:46:28 | D | - sum error = [ 5214.4572, 5214.7702, 5202.8383, 5202.9126, 5201.2287] +25-08-31 07:46:28 | D | - best error = [ 5177.8563, 5177.8563, 5177.8563, 5177.8563, 5177.8563] +25-08-31 07:46:28 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:46:28 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:46:28 | D | - sum error = [ 5205.5772, 5221.4469, 5223.7004, 5243.2419] +25-08-31 07:46:28 | D | - best error = [ 5177.8563, 5177.8563, 5177.8563, 5177.8563] +25-08-31 07:46:28 | D | + error = 5177.8563 +25-08-31 07:46:28 | D | + scale = [min=1.0043, max=3.7740] +25-08-31 07:46:28 | D | - single_transformer_blocks.18.down_proj +25-08-31 07:46:28 | D | + w: sfp4_e2m1_all +25-08-31 07:46:28 | D | + x: sfp4_e2m1_all +25-08-31 07:46:28 | D | + y: None +25-08-31 07:46:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:46:28 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:46:30 | D | + x - AbsMax +25-08-31 07:46:30 | D | + x = [min=0.8242, max=17.0000] +25-08-31 07:46:30 | D | + w - AbsMax +25-08-31 07:46:30 | D | + w = [min=0.1113, max=1.0938] +25-08-31 07:46:30 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:46:32 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:48:19 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:48:19 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:48:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:48:19 | D | - sum error = [ 5967.5878, 5994.4314, 5974.5668, 5985.5249, 5982.8428] +25-08-31 07:48:19 | D | - best error = [ 5967.5878, 5967.5878, 5967.5878, 5967.5878, 5967.5878] +25-08-31 07:48:19 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:48:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:48:19 | D | - sum error = [ 5996.6471, 6002.8053, 6019.1810, 6043.1766, 6049.7954] +25-08-31 07:48:19 | D | - best error = [ 5967.5878, 5967.5878, 5967.5878, 5967.5878, 5967.5878] +25-08-31 07:48:19 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:48:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:48:19 | D | - sum error = [ 6075.2613, 6076.5572, 6102.7735, 6299.1615, 6159.8968] +25-08-31 07:48:19 | D | - best error = [ 5967.5878, 5967.5878, 5967.5878, 5967.5878, 5967.5878] +25-08-31 07:48:19 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:48:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:48:19 | D | - sum error = [ 6192.6902, 6213.2005, 6250.5824, 6280.6579, 6331.4017] +25-08-31 07:48:19 | D | - best error = [ 5967.5878, 5967.5878, 5967.5878, 5967.5878, 5967.5878] +25-08-31 07:48:19 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:48:19 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:48:19 | D | - sum error = [ 6465.1026, 6412.4158, 6347.9647, 6309.5382, 6264.7634] +25-08-31 07:48:19 | D | - best error = [ 5967.5878, 5967.5878, 5967.5878, 5967.5878, 5967.5878] +25-08-31 07:48:19 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:48:19 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:48:19 | D | - sum error = [ 6213.3965, 6182.9508, 6176.7364, 6157.2192, 6133.5862] +25-08-31 07:48:19 | D | - best error = [ 5967.5878, 5967.5878, 5967.5878, 5967.5878, 5967.5878] +25-08-31 07:48:19 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:48:19 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:48:19 | D | - sum error = [ 6132.5026, 6142.3829, 6139.9173, 6158.6015, 6171.3652] +25-08-31 07:48:19 | D | - best error = [ 5967.5878, 5967.5878, 5967.5878, 5967.5878, 5967.5878] +25-08-31 07:48:19 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:48:19 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:48:19 | D | - sum error = [ 6211.4391, 6230.9367, 6280.6987, 6316.2175] +25-08-31 07:48:19 | D | - best error = [ 5967.5878, 5967.5878, 5967.5878, 5967.5878] +25-08-31 07:48:19 | D | + error = 5967.5878 +25-08-31 07:48:19 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 07:48:37 | D | - Smoothing Diffusion Block single_transformer_blocks.19 +25-08-31 07:48:37 | D | - Skipping Module single_transformer_blocks.19.norm.linear +25-08-31 07:48:37 | D | - Smoothing Transformer Block single_transformer_blocks.19 +25-08-31 07:48:37 | D | - single_transformer_blocks.19.attn.qkv_proj + single_transformer_blocks.19.up_proj +25-08-31 07:48:37 | D | + w: sfp4_e2m1_all +25-08-31 07:48:37 | D | + x: sfp4_e2m1_all +25-08-31 07:48:37 | D | + y: None +25-08-31 07:48:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:48:37 | D | + finished parsing calibration arguments, ram usage: 15.0 +25-08-31 07:48:37 | D | + x - AbsMax +25-08-31 07:48:37 | D | + x = [min=0.2217, max=17.3750] +25-08-31 07:48:37 | D | + w - AbsMax +25-08-31 07:48:37 | D | + w = [min=0.0679, max=0.8672] +25-08-31 07:48:37 | D | + finished resetting calibrator, ram usage: 15.0 +25-08-31 07:48:38 | D | + finished calculating the original outputs, ram usage: 15.0 +25-08-31 07:50:45 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:50:45 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:50:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:50:45 | D | - sum error = [45272.2834, 44664.2543, 44122.1403, 43769.0243, 43516.6896] +25-08-31 07:50:45 | D | - best error = [45272.2834, 44664.2543, 44122.1403, 43769.0243, 43516.6896] +25-08-31 07:50:45 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:50:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:50:45 | D | - sum error = [43067.3697, 43009.2202, 42900.8546, 42579.2678, 42387.9379] +25-08-31 07:50:45 | D | - best error = [43067.3697, 43009.2202, 42900.8546, 42579.2678, 42387.9379] +25-08-31 07:50:45 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:50:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:50:45 | D | - sum error = [42316.5050, 41920.5206, 42448.4945, 41881.7046, 41933.3582] +25-08-31 07:50:45 | D | - best error = [42316.5050, 41920.5206, 41920.5206, 41881.7046, 41881.7046] +25-08-31 07:50:45 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:50:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:50:45 | D | - sum error = [41871.9587, 41857.8669, 41943.7826, 41962.4091, 42131.1248] +25-08-31 07:50:45 | D | - best error = [41871.9587, 41857.8669, 41857.8669, 41857.8669, 41857.8669] +25-08-31 07:50:45 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:50:45 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:50:45 | D | - sum error = [56014.1674, 54225.9734, 51885.7428, 50456.3368, 49102.3343] +25-08-31 07:50:45 | D | - best error = [41857.8669, 41857.8669, 41857.8669, 41857.8669, 41857.8669] +25-08-31 07:50:45 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:50:45 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:50:45 | D | - sum error = [47774.6202, 46524.0796, 45347.2659, 44661.0545, 44143.0692] +25-08-31 07:50:45 | D | - best error = [41857.8669, 41857.8669, 41857.8669, 41857.8669, 41857.8669] +25-08-31 07:50:45 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:50:45 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:50:45 | D | - sum error = [43665.1990, 42887.7351, 42585.7170, 42452.0254, 42578.6365] +25-08-31 07:50:45 | D | - best error = [41857.8669, 41857.8669, 41857.8669, 41857.8669, 41857.8669] +25-08-31 07:50:45 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:50:45 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:50:45 | D | - sum error = [42093.9353, 42090.2375, 42014.7875, 42152.3272] +25-08-31 07:50:45 | D | - best error = [41857.8669, 41857.8669, 41857.8669, 41857.8669] +25-08-31 07:50:45 | D | + error = 41857.8669 +25-08-31 07:50:45 | D | + scale = [min=0.2996, max=9.8161] +25-08-31 07:50:45 | D | - single_transformer_blocks.19.attn.out_proj +25-08-31 07:50:45 | D | + w: sfp4_e2m1_all +25-08-31 07:50:45 | D | + x: sfp4_e2m1_all +25-08-31 07:50:45 | D | + y: None +25-08-31 07:50:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:50:45 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 07:50:46 | D | + x - AbsMax +25-08-31 07:50:46 | D | + x = [min=1.0391, max=7.5000] +25-08-31 07:50:46 | D | + w - AbsMax +25-08-31 07:50:46 | D | + w = [min=0.1196, max=0.4121] +25-08-31 07:50:46 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 07:50:46 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 07:51:46 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:51:46 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:51:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:51:46 | D | - sum error = [ 4516.2166, 4514.2193, 4509.8512, 4519.3334, 4508.9589] +25-08-31 07:51:46 | D | - best error = [ 4516.2166, 4514.2193, 4509.8512, 4509.8512, 4508.9589] +25-08-31 07:51:46 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:51:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:51:46 | D | - sum error = [ 4521.8582, 4516.6465, 4517.9093, 4523.3611, 4522.3326] +25-08-31 07:51:46 | D | - best error = [ 4508.9589, 4508.9589, 4508.9589, 4508.9589, 4508.9589] +25-08-31 07:51:46 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:51:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:51:46 | D | - sum error = [ 4516.1247, 4516.0040, 4509.1970, 4512.5381, 4514.1008] +25-08-31 07:51:46 | D | - best error = [ 4508.9589, 4508.9589, 4508.9589, 4508.9589, 4508.9589] +25-08-31 07:51:46 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:51:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:51:46 | D | - sum error = [ 4519.0461, 4508.7519, 4518.1213, 4518.9702, 4526.7092] +25-08-31 07:51:46 | D | - best error = [ 4508.9589, 4508.7519, 4508.7519, 4508.7519, 4508.7519] +25-08-31 07:51:46 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:51:46 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:51:46 | D | - sum error = [ 4594.2721, 4593.1523, 4576.8900, 4579.8738, 4559.4329] +25-08-31 07:51:46 | D | - best error = [ 4508.7519, 4508.7519, 4508.7519, 4508.7519, 4508.7519] +25-08-31 07:51:46 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:51:46 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:51:46 | D | - sum error = [ 4561.4215, 4546.8745, 4542.7483, 4543.7953, 4543.9316] +25-08-31 07:51:46 | D | - best error = [ 4508.7519, 4508.7519, 4508.7519, 4508.7519, 4508.7519] +25-08-31 07:51:46 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:51:46 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:51:46 | D | - sum error = [ 4546.0706, 4532.1977, 4519.8040, 4526.7317, 4525.3104] +25-08-31 07:51:46 | D | - best error = [ 4508.7519, 4508.7519, 4508.7519, 4508.7519, 4508.7519] +25-08-31 07:51:46 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:51:46 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:51:46 | D | - sum error = [ 4522.0046, 4520.0735, 4512.4809, 4528.4423] +25-08-31 07:51:46 | D | - best error = [ 4508.7519, 4508.7519, 4508.7519, 4508.7519] +25-08-31 07:51:46 | D | + error = 4508.7519 +25-08-31 07:51:46 | D | + scale = [min=1.0311, max=5.0124] +25-08-31 07:51:46 | D | - single_transformer_blocks.19.down_proj +25-08-31 07:51:46 | D | + w: sfp4_e2m1_all +25-08-31 07:51:46 | D | + x: sfp4_e2m1_all +25-08-31 07:51:46 | D | + y: None +25-08-31 07:51:46 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:51:46 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 07:51:48 | D | + x - AbsMax +25-08-31 07:51:48 | D | + x = [min=0.9258, max=14.2500] +25-08-31 07:51:48 | D | + w - AbsMax +25-08-31 07:51:48 | D | + w = [min=0.1079, max=1.4297] +25-08-31 07:51:48 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 07:51:50 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 07:53:50 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:53:50 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:53:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:53:50 | D | - sum error = [ 5630.3325, 5633.5708, 5650.1485, 5652.4934, 5673.8842] +25-08-31 07:53:50 | D | - best error = [ 5630.3325, 5630.3325, 5630.3325, 5630.3325, 5630.3325] +25-08-31 07:53:50 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:53:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:53:50 | D | - sum error = [ 5679.5813, 5681.5051, 5700.1484, 5712.7291, 5723.4671] +25-08-31 07:53:50 | D | - best error = [ 5630.3325, 5630.3325, 5630.3325, 5630.3325, 5630.3325] +25-08-31 07:53:50 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:53:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:53:50 | D | - sum error = [ 5758.3897, 5760.4500, 5803.9087, 5822.3215, 5852.9586] +25-08-31 07:53:50 | D | - best error = [ 5630.3325, 5630.3325, 5630.3325, 5630.3325, 5630.3325] +25-08-31 07:53:50 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:53:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:53:50 | D | - sum error = [ 5887.5740, 5922.0922, 5958.7985, 6011.3998, 6050.8961] +25-08-31 07:53:50 | D | - best error = [ 5630.3325, 5630.3325, 5630.3325, 5630.3325, 5630.3325] +25-08-31 07:53:50 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:53:50 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:53:50 | D | - sum error = [ 6035.6398, 5977.7474, 5952.0534, 5901.9970, 5865.6437] +25-08-31 07:53:50 | D | - best error = [ 5630.3325, 5630.3325, 5630.3325, 5630.3325, 5630.3325] +25-08-31 07:53:50 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:53:50 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:53:50 | D | - sum error = [ 5835.8358, 5808.2366, 5781.5313, 5784.4827, 5785.2430] +25-08-31 07:53:50 | D | - best error = [ 5630.3325, 5630.3325, 5630.3325, 5630.3325, 5630.3325] +25-08-31 07:53:50 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:53:50 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:53:50 | D | - sum error = [ 5789.5426, 5798.3025, 5806.8776, 5830.6830, 5859.9167] +25-08-31 07:53:50 | D | - best error = [ 5630.3325, 5630.3325, 5630.3325, 5630.3325, 5630.3325] +25-08-31 07:53:50 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:53:50 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:53:50 | D | - sum error = [ 6143.8281, 5936.2894, 5982.7428, 6028.2561] +25-08-31 07:53:50 | D | - best error = [ 5630.3325, 5630.3325, 5630.3325, 5630.3325] +25-08-31 07:53:50 | D | + error = 5630.3325 +25-08-31 07:53:50 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 07:54:09 | D | - Smoothing Diffusion Block single_transformer_blocks.20 +25-08-31 07:54:09 | D | - Skipping Module single_transformer_blocks.20.norm.linear +25-08-31 07:54:09 | D | - Smoothing Transformer Block single_transformer_blocks.20 +25-08-31 07:54:09 | D | - single_transformer_blocks.20.attn.qkv_proj + single_transformer_blocks.20.up_proj +25-08-31 07:54:09 | D | + w: sfp4_e2m1_all +25-08-31 07:54:09 | D | + x: sfp4_e2m1_all +25-08-31 07:54:09 | D | + y: None +25-08-31 07:54:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:54:09 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 07:54:09 | D | + x - AbsMax +25-08-31 07:54:09 | D | + x = [min=0.1533, max=19.5000] +25-08-31 07:54:09 | D | + w - AbsMax +25-08-31 07:54:09 | D | + w = [min=0.0947, max=0.8789] +25-08-31 07:54:09 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 07:54:10 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 07:56:19 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:56:19 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:56:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:56:19 | D | - sum error = [47095.6732, 46578.3606, 46727.7939, 46590.8269, 46214.2212] +25-08-31 07:56:19 | D | - best error = [47095.6732, 46578.3606, 46578.3606, 46578.3606, 46214.2212] +25-08-31 07:56:19 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:56:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:56:19 | D | - sum error = [45528.0696, 45578.0944, 45068.4102, 45169.7513, 45038.9854] +25-08-31 07:56:19 | D | - best error = [45528.0696, 45528.0696, 45068.4102, 45068.4102, 45038.9854] +25-08-31 07:56:19 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:56:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:56:19 | D | - sum error = [44716.6739, 44668.9283, 44762.8932, 44804.8269, 45054.5184] +25-08-31 07:56:19 | D | - best error = [44716.6739, 44668.9283, 44668.9283, 44668.9283, 44668.9283] +25-08-31 07:56:19 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:56:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:56:19 | D | - sum error = [44930.8817, 45004.3113, 45017.5034, 45305.2792, 45632.6199] +25-08-31 07:56:19 | D | - best error = [44668.9283, 44668.9283, 44668.9283, 44668.9283, 44668.9283] +25-08-31 07:56:19 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:56:19 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:56:19 | D | - sum error = [54805.4771, 53279.6492, 51776.8356, 50507.0786, 49358.8071] +25-08-31 07:56:19 | D | - best error = [44668.9283, 44668.9283, 44668.9283, 44668.9283, 44668.9283] +25-08-31 07:56:19 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:56:19 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:56:19 | D | - sum error = [48572.7693, 47835.0713, 47178.3571, 46892.3479, 46297.1978] +25-08-31 07:56:19 | D | - best error = [44668.9283, 44668.9283, 44668.9283, 44668.9283, 44668.9283] +25-08-31 07:56:19 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:56:19 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:56:19 | D | - sum error = [45980.4035, 45491.1740, 45194.8655, 45312.9576, 44950.7008] +25-08-31 07:56:19 | D | - best error = [44668.9283, 44668.9283, 44668.9283, 44668.9283, 44668.9283] +25-08-31 07:56:19 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:56:19 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:56:19 | D | - sum error = [44974.5286, 44918.9606, 45223.6936, 45235.7268] +25-08-31 07:56:19 | D | - best error = [44668.9283, 44668.9283, 44668.9283, 44668.9283] +25-08-31 07:56:19 | D | + error = 44668.9283 +25-08-31 07:56:19 | D | + scale = [min=0.3565, max=5.1229] +25-08-31 07:56:19 | D | - single_transformer_blocks.20.attn.out_proj +25-08-31 07:56:19 | D | + w: sfp4_e2m1_all +25-08-31 07:56:19 | D | + x: sfp4_e2m1_all +25-08-31 07:56:19 | D | + y: None +25-08-31 07:56:19 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:56:19 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 07:56:20 | D | + x - AbsMax +25-08-31 07:56:20 | D | + x = [min=0.8398, max=6.1562] +25-08-31 07:56:20 | D | + w - AbsMax +25-08-31 07:56:20 | D | + w = [min=0.1055, max=0.5586] +25-08-31 07:56:20 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 07:56:20 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 07:57:20 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:57:20 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:57:20 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:57:20 | D | - sum error = [ 4836.5806, 4821.6553, 4828.8672, 4829.2229, 4815.6794] +25-08-31 07:57:20 | D | - best error = [ 4836.5806, 4821.6553, 4821.6553, 4821.6553, 4815.6794] +25-08-31 07:57:20 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:57:20 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:57:20 | D | - sum error = [ 4811.6482, 4807.9314, 4822.5719, 4816.3457, 4824.2291] +25-08-31 07:57:20 | D | - best error = [ 4811.6482, 4807.9314, 4807.9314, 4807.9314, 4807.9314] +25-08-31 07:57:20 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:57:20 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:57:20 | D | - sum error = [ 4803.8366, 4807.7735, 4804.5635, 4815.1980, 4814.7139] +25-08-31 07:57:20 | D | - best error = [ 4803.8366, 4803.8366, 4803.8366, 4803.8366, 4803.8366] +25-08-31 07:57:20 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:57:20 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:57:20 | D | - sum error = [ 4812.7593, 4812.8038, 4828.9378, 4838.6432, 4829.5853] +25-08-31 07:57:20 | D | - best error = [ 4803.8366, 4803.8366, 4803.8366, 4803.8366, 4803.8366] +25-08-31 07:57:20 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:57:20 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:57:20 | D | - sum error = [ 4920.0894, 4907.9872, 4890.2407, 4874.2995, 4862.3289] +25-08-31 07:57:20 | D | - best error = [ 4803.8366, 4803.8366, 4803.8366, 4803.8366, 4803.8366] +25-08-31 07:57:20 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:57:20 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:57:20 | D | - sum error = [ 4849.8419, 4845.9331, 4843.0924, 4834.6293, 4840.0093] +25-08-31 07:57:20 | D | - best error = [ 4803.8366, 4803.8366, 4803.8366, 4803.8366, 4803.8366] +25-08-31 07:57:20 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:57:20 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:57:20 | D | - sum error = [ 4835.5384, 4829.4449, 4835.7275, 4827.7142, 4826.8996] +25-08-31 07:57:20 | D | - best error = [ 4803.8366, 4803.8366, 4803.8366, 4803.8366, 4803.8366] +25-08-31 07:57:20 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:57:20 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:57:20 | D | - sum error = [ 4827.1967, 4825.6048, 4830.6815, 4836.2022] +25-08-31 07:57:20 | D | - best error = [ 4803.8366, 4803.8366, 4803.8366, 4803.8366] +25-08-31 07:57:20 | D | + error = 4803.8366 +25-08-31 07:57:20 | D | + scale = [min=0.9164, max=2.4812] +25-08-31 07:57:20 | D | - single_transformer_blocks.20.down_proj +25-08-31 07:57:20 | D | + w: sfp4_e2m1_all +25-08-31 07:57:20 | D | + x: sfp4_e2m1_all +25-08-31 07:57:20 | D | + y: None +25-08-31 07:57:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:57:20 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 07:57:22 | D | + x - AbsMax +25-08-31 07:57:22 | D | + x = [min=0.6953, max=11.7500] +25-08-31 07:57:22 | D | + w - AbsMax +25-08-31 07:57:22 | D | + w = [min=0.1113, max=1.1484] +25-08-31 07:57:22 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 07:57:24 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 07:59:19 | D | - x / w range = AbsMax / AbsMax +25-08-31 07:59:19 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 07:59:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:59:19 | D | - sum error = [ 5744.2651, 5753.1866, 5751.7980, 5756.8055, 5759.5854] +25-08-31 07:59:19 | D | - best error = [ 5744.2651, 5744.2651, 5744.2651, 5744.2651, 5744.2651] +25-08-31 07:59:19 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 07:59:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:59:19 | D | - sum error = [ 5769.9702, 5832.5919, 5790.2121, 5792.6574, 5811.1476] +25-08-31 07:59:19 | D | - best error = [ 5744.2651, 5744.2651, 5744.2651, 5744.2651, 5744.2651] +25-08-31 07:59:19 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 07:59:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:59:19 | D | - sum error = [ 5827.1128, 5838.5056, 6070.2359, 5878.4876, 5907.4800] +25-08-31 07:59:19 | D | - best error = [ 5744.2651, 5744.2651, 5744.2651, 5744.2651, 5744.2651] +25-08-31 07:59:19 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:59:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 07:59:19 | D | - sum error = [ 5922.8004, 6094.1178, 5998.2931, 6021.0179, 6063.6873] +25-08-31 07:59:19 | D | - best error = [ 5744.2651, 5744.2651, 5744.2651, 5744.2651, 5744.2651] +25-08-31 07:59:19 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 07:59:19 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 07:59:19 | D | - sum error = [ 6234.7131, 6167.2506, 6116.8888, 6071.4104, 6026.8725] +25-08-31 07:59:19 | D | - best error = [ 5744.2651, 5744.2651, 5744.2651, 5744.2651, 5744.2651] +25-08-31 07:59:19 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 07:59:19 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 07:59:19 | D | - sum error = [ 5979.7304, 5939.5286, 5914.8417, 5888.3043, 5883.0482] +25-08-31 07:59:19 | D | - best error = [ 5744.2651, 5744.2651, 5744.2651, 5744.2651, 5744.2651] +25-08-31 07:59:19 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 07:59:19 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 07:59:19 | D | - sum error = [ 5875.1609, 5883.1685, 5885.7632, 5881.0159, 5918.6032] +25-08-31 07:59:19 | D | - best error = [ 5744.2651, 5744.2651, 5744.2651, 5744.2651, 5744.2651] +25-08-31 07:59:19 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 07:59:19 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 07:59:19 | D | - sum error = [ 5936.8645, 5967.5100, 6007.2097, 6056.7173] +25-08-31 07:59:19 | D | - best error = [ 5744.2651, 5744.2651, 5744.2651, 5744.2651] +25-08-31 07:59:19 | D | + error = 5744.2651 +25-08-31 07:59:19 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 07:59:38 | D | - Smoothing Diffusion Block single_transformer_blocks.21 +25-08-31 07:59:38 | D | - Skipping Module single_transformer_blocks.21.norm.linear +25-08-31 07:59:38 | D | - Smoothing Transformer Block single_transformer_blocks.21 +25-08-31 07:59:38 | D | - single_transformer_blocks.21.attn.qkv_proj + single_transformer_blocks.21.up_proj +25-08-31 07:59:38 | D | + w: sfp4_e2m1_all +25-08-31 07:59:38 | D | + x: sfp4_e2m1_all +25-08-31 07:59:38 | D | + y: None +25-08-31 07:59:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 07:59:38 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 07:59:38 | D | + x - AbsMax +25-08-31 07:59:38 | D | + x = [min=0.1611, max=23.1250] +25-08-31 07:59:38 | D | + w - AbsMax +25-08-31 07:59:38 | D | + w = [min=0.0654, max=0.9141] +25-08-31 07:59:38 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 07:59:40 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:01:48 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:01:48 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:01:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:01:48 | D | - sum error = [48683.3228, 48178.1414, 48058.3938, 47348.9879, 47089.5494] +25-08-31 08:01:48 | D | - best error = [48683.3228, 48178.1414, 48058.3938, 47348.9879, 47089.5494] +25-08-31 08:01:48 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:01:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:01:48 | D | - sum error = [46581.1086, 46482.5665, 45863.3309, 45636.2144, 45511.5451] +25-08-31 08:01:48 | D | - best error = [46581.1086, 46482.5665, 45863.3309, 45636.2144, 45511.5451] +25-08-31 08:01:48 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:01:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:01:48 | D | - sum error = [45446.7727, 45378.3638, 45285.1333, 45551.6825, 44952.0278] +25-08-31 08:01:48 | D | - best error = [45446.7727, 45378.3638, 45285.1333, 45285.1333, 44952.0278] +25-08-31 08:01:48 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:01:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:01:48 | D | - sum error = [45012.9581, 45161.4349, 45199.3147, 45253.7945, 45664.1205] +25-08-31 08:01:48 | D | - best error = [44952.0278, 44952.0278, 44952.0278, 44952.0278, 44952.0278] +25-08-31 08:01:48 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:01:48 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:01:48 | D | - sum error = [55317.9771, 53698.2836, 52810.6147, 51428.6445, 50075.4605] +25-08-31 08:01:48 | D | - best error = [44952.0278, 44952.0278, 44952.0278, 44952.0278, 44952.0278] +25-08-31 08:01:48 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:01:48 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:01:48 | D | - sum error = [48941.8345, 47993.7889, 47323.6293, 46922.0890, 46487.4596] +25-08-31 08:01:48 | D | - best error = [44952.0278, 44952.0278, 44952.0278, 44952.0278, 44952.0278] +25-08-31 08:01:48 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:01:48 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:01:48 | D | - sum error = [46059.5093, 45724.9936, 45828.2694, 45428.3647, 45419.2178] +25-08-31 08:01:48 | D | - best error = [44952.0278, 44952.0278, 44952.0278, 44952.0278, 44952.0278] +25-08-31 08:01:48 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:01:48 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:01:48 | D | - sum error = [45228.0493, 45286.4335, 45195.9701, 45603.3795] +25-08-31 08:01:48 | D | - best error = [44952.0278, 44952.0278, 44952.0278, 44952.0278] +25-08-31 08:01:48 | D | + error = 44952.0278 +25-08-31 08:01:48 | D | + scale = [min=0.2786, max=9.0127] +25-08-31 08:01:48 | D | - single_transformer_blocks.21.attn.out_proj +25-08-31 08:01:48 | D | + w: sfp4_e2m1_all +25-08-31 08:01:48 | D | + x: sfp4_e2m1_all +25-08-31 08:01:48 | D | + y: None +25-08-31 08:01:48 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:01:48 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:01:49 | D | + x - AbsMax +25-08-31 08:01:49 | D | + x = [min=0.9492, max=6.5000] +25-08-31 08:01:49 | D | + w - AbsMax +25-08-31 08:01:49 | D | + w = [min=0.1118, max=0.4004] +25-08-31 08:01:49 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:01:50 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:02:51 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:02:51 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:02:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:02:51 | D | - sum error = [ 4503.7795, 4500.0098, 4492.3798, 4478.6565, 4470.0740] +25-08-31 08:02:51 | D | - best error = [ 4503.7795, 4500.0098, 4492.3798, 4478.6565, 4470.0740] +25-08-31 08:02:51 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:02:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:02:51 | D | - sum error = [ 4475.5695, 4484.4846, 4459.5693, 4467.2331, 4455.3249] +25-08-31 08:02:51 | D | - best error = [ 4470.0740, 4470.0740, 4459.5693, 4459.5693, 4455.3249] +25-08-31 08:02:51 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:02:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:02:51 | D | - sum error = [ 4451.2261, 4441.4639, 4438.3630, 4455.3110, 4447.2798] +25-08-31 08:02:51 | D | - best error = [ 4451.2261, 4441.4639, 4438.3630, 4438.3630, 4438.3630] +25-08-31 08:02:51 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:02:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:02:51 | D | - sum error = [ 4434.1579, 4446.6174, 4452.0198, 4443.4523, 4443.1464] +25-08-31 08:02:51 | D | - best error = [ 4434.1579, 4434.1579, 4434.1579, 4434.1579, 4434.1579] +25-08-31 08:02:51 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:02:51 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:02:51 | D | - sum error = [ 4573.8561, 4555.9416, 4559.4903, 4533.5200, 4526.9175] +25-08-31 08:02:51 | D | - best error = [ 4434.1579, 4434.1579, 4434.1579, 4434.1579, 4434.1579] +25-08-31 08:02:51 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:02:51 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:02:51 | D | - sum error = [ 4512.7227, 4506.3595, 4500.3284, 4491.6485, 4477.5750] +25-08-31 08:02:51 | D | - best error = [ 4434.1579, 4434.1579, 4434.1579, 4434.1579, 4434.1579] +25-08-31 08:02:51 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:02:51 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:02:51 | D | - sum error = [ 4475.2457, 4471.2061, 4463.1953, 4461.1297, 4451.6287] +25-08-31 08:02:51 | D | - best error = [ 4434.1579, 4434.1579, 4434.1579, 4434.1579, 4434.1579] +25-08-31 08:02:51 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:02:51 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:02:51 | D | - sum error = [ 4460.7739, 4451.8387, 4447.6787, 4444.3096] +25-08-31 08:02:51 | D | - best error = [ 4434.1579, 4434.1579, 4434.1579, 4434.1579] +25-08-31 08:02:51 | D | + error = 4434.1579 +25-08-31 08:02:51 | D | + scale = [min=0.9617, max=4.0708] +25-08-31 08:02:51 | D | - single_transformer_blocks.21.down_proj +25-08-31 08:02:51 | D | + w: sfp4_e2m1_all +25-08-31 08:02:51 | D | + x: sfp4_e2m1_all +25-08-31 08:02:51 | D | + y: None +25-08-31 08:02:51 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:02:51 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:02:54 | D | + x - AbsMax +25-08-31 08:02:54 | D | + x = [min=0.7812, max=15.8125] +25-08-31 08:02:54 | D | + w - AbsMax +25-08-31 08:02:54 | D | + w = [min=0.1074, max=1.4141] +25-08-31 08:02:54 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:02:56 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:04:55 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:04:55 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:04:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:04:55 | D | - sum error = [ 5451.2997, 5461.1327, 5465.8529, 5469.2482, 5477.5807] +25-08-31 08:04:55 | D | - best error = [ 5451.2997, 5451.2997, 5451.2997, 5451.2997, 5451.2997] +25-08-31 08:04:55 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:04:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:04:55 | D | - sum error = [ 5487.1998, 5493.9305, 5486.2455, 5511.4363, 5518.6029] +25-08-31 08:04:55 | D | - best error = [ 5451.2997, 5451.2997, 5451.2997, 5451.2997, 5451.2997] +25-08-31 08:04:55 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:04:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:04:55 | D | - sum error = [ 5528.7145, 5546.7982, 5560.2996, 5572.3044, 5602.9289] +25-08-31 08:04:55 | D | - best error = [ 5451.2997, 5451.2997, 5451.2997, 5451.2997, 5451.2997] +25-08-31 08:04:55 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:04:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:04:55 | D | - sum error = [ 5652.3117, 5642.5399, 5664.4470, 5700.7252, 5729.7251] +25-08-31 08:04:55 | D | - best error = [ 5451.2997, 5451.2997, 5451.2997, 5451.2997, 5451.2997] +25-08-31 08:04:55 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:04:55 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:04:55 | D | - sum error = [ 5923.8960, 5872.4050, 5807.8385, 5757.8223, 5718.0584] +25-08-31 08:04:55 | D | - best error = [ 5451.2997, 5451.2997, 5451.2997, 5451.2997, 5451.2997] +25-08-31 08:04:55 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:04:55 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:04:55 | D | - sum error = [ 5680.1551, 5635.0177, 5614.5100, 5595.4446, 5598.7020] +25-08-31 08:04:55 | D | - best error = [ 5451.2997, 5451.2997, 5451.2997, 5451.2997, 5451.2997] +25-08-31 08:04:55 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:04:55 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:04:55 | D | - sum error = [ 5588.2584, 5588.3456, 5581.6994, 5603.8635, 5616.0411] +25-08-31 08:04:55 | D | - best error = [ 5451.2997, 5451.2997, 5451.2997, 5451.2997, 5451.2997] +25-08-31 08:04:55 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:04:55 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:04:55 | D | - sum error = [ 5634.9385, 5660.4655, 5682.5880, 5729.8579] +25-08-31 08:04:55 | D | - best error = [ 5451.2997, 5451.2997, 5451.2997, 5451.2997] +25-08-31 08:04:55 | D | + error = 5451.2997 +25-08-31 08:04:55 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 08:05:14 | D | - Smoothing Diffusion Block single_transformer_blocks.22 +25-08-31 08:05:14 | D | - Skipping Module single_transformer_blocks.22.norm.linear +25-08-31 08:05:14 | D | - Smoothing Transformer Block single_transformer_blocks.22 +25-08-31 08:05:14 | D | - single_transformer_blocks.22.attn.qkv_proj + single_transformer_blocks.22.up_proj +25-08-31 08:05:14 | D | + w: sfp4_e2m1_all +25-08-31 08:05:14 | D | + x: sfp4_e2m1_all +25-08-31 08:05:14 | D | + y: None +25-08-31 08:05:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:05:14 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:05:14 | D | + x - AbsMax +25-08-31 08:05:14 | D | + x = [min=0.1650, max=16.0000] +25-08-31 08:05:14 | D | + w - AbsMax +25-08-31 08:05:14 | D | + w = [min=0.0796, max=0.7031] +25-08-31 08:05:14 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:05:16 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:07:23 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:07:23 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:07:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:07:23 | D | - sum error = [52360.6374, 51971.5910, 51684.9236, 51553.7033, 51239.4123] +25-08-31 08:07:23 | D | - best error = [52360.6374, 51971.5910, 51684.9236, 51553.7033, 51239.4123] +25-08-31 08:07:23 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:07:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:07:23 | D | - sum error = [50819.7505, 50587.9973, 50518.1548, 50380.8624, 50330.0238] +25-08-31 08:07:23 | D | - best error = [50819.7505, 50587.9973, 50518.1548, 50380.8624, 50330.0238] +25-08-31 08:07:23 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:07:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:07:23 | D | - sum error = [50111.8212, 50078.0881, 49963.7168, 49952.2178, 49917.6461] +25-08-31 08:07:23 | D | - best error = [50111.8212, 50078.0881, 49963.7168, 49952.2178, 49917.6461] +25-08-31 08:07:23 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:07:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:07:23 | D | - sum error = [49922.1897, 49999.6416, 49943.5970, 50058.2036, 50105.2486] +25-08-31 08:07:23 | D | - best error = [49917.6461, 49917.6461, 49917.6461, 49917.6461, 49917.6461] +25-08-31 08:07:23 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:07:23 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:07:23 | D | - sum error = [61938.9573, 59657.3705, 58065.4726, 56700.7065, 55623.3114] +25-08-31 08:07:23 | D | - best error = [49917.6461, 49917.6461, 49917.6461, 49917.6461, 49917.6461] +25-08-31 08:07:23 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:07:23 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:07:23 | D | - sum error = [54565.3745, 53589.6085, 52808.2781, 52479.7317, 51788.8142] +25-08-31 08:07:23 | D | - best error = [49917.6461, 49917.6461, 49917.6461, 49917.6461, 49917.6461] +25-08-31 08:07:23 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:07:23 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:07:23 | D | - sum error = [51337.4355, 51716.3761, 50824.3368, 50559.3721, 50283.3749] +25-08-31 08:07:23 | D | - best error = [49917.6461, 49917.6461, 49917.6461, 49917.6461, 49917.6461] +25-08-31 08:07:23 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:07:23 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:07:23 | D | - sum error = [50149.1408, 50068.3667, 50180.2422, 49983.0115] +25-08-31 08:07:23 | D | - best error = [49917.6461, 49917.6461, 49917.6461, 49917.6461] +25-08-31 08:07:23 | D | + error = 49917.6461 +25-08-31 08:07:23 | D | + scale = [min=0.2833, max=6.9644] +25-08-31 08:07:23 | D | - single_transformer_blocks.22.attn.out_proj +25-08-31 08:07:23 | D | + w: sfp4_e2m1_all +25-08-31 08:07:23 | D | + x: sfp4_e2m1_all +25-08-31 08:07:23 | D | + y: None +25-08-31 08:07:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:07:23 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:07:24 | D | + x - AbsMax +25-08-31 08:07:24 | D | + x = [min=1.2812, max=7.1562] +25-08-31 08:07:24 | D | + w - AbsMax +25-08-31 08:07:24 | D | + w = [min=0.1211, max=0.4121] +25-08-31 08:07:24 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:07:25 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:08:23 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:08:23 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:08:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:08:23 | D | - sum error = [ 4769.3247, 4779.0805, 4765.5184, 4763.1920, 4762.1663] +25-08-31 08:08:23 | D | - best error = [ 4769.3247, 4769.3247, 4765.5184, 4763.1920, 4762.1663] +25-08-31 08:08:23 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:08:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:08:23 | D | - sum error = [ 4758.8826, 4745.9598, 4741.6548, 4730.2404, 4732.0021] +25-08-31 08:08:23 | D | - best error = [ 4758.8826, 4745.9598, 4741.6548, 4730.2404, 4730.2404] +25-08-31 08:08:23 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:08:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:08:23 | D | - sum error = [ 4727.0299, 4717.5303, 4715.0262, 4704.5912, 4700.6510] +25-08-31 08:08:23 | D | - best error = [ 4727.0299, 4717.5303, 4715.0262, 4704.5912, 4700.6510] +25-08-31 08:08:23 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:08:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:08:23 | D | - sum error = [ 4697.0431, 4686.1575, 4681.9912, 4671.3058, 4660.8349] +25-08-31 08:08:23 | D | - best error = [ 4697.0431, 4686.1575, 4681.9912, 4671.3058, 4660.8349] +25-08-31 08:08:23 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:08:23 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:08:23 | D | - sum error = [ 4833.8734, 4817.3412, 4811.3246, 4808.4432, 4802.5100] +25-08-31 08:08:23 | D | - best error = [ 4660.8349, 4660.8349, 4660.8349, 4660.8349, 4660.8349] +25-08-31 08:08:23 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:08:23 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:08:23 | D | - sum error = [ 4804.0645, 4790.2236, 4783.2205, 4776.3710, 4761.4603] +25-08-31 08:08:23 | D | - best error = [ 4660.8349, 4660.8349, 4660.8349, 4660.8349, 4660.8349] +25-08-31 08:08:23 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:08:23 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:08:23 | D | - sum error = [ 4751.3278, 4750.8744, 4730.7555, 4725.0901, 4713.9548] +25-08-31 08:08:23 | D | - best error = [ 4660.8349, 4660.8349, 4660.8349, 4660.8349, 4660.8349] +25-08-31 08:08:23 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:08:23 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:08:23 | D | - sum error = [ 4699.8134, 4691.4927, 4683.4274, 4667.8282] +25-08-31 08:08:23 | D | - best error = [ 4660.8349, 4660.8349, 4660.8349, 4660.8349] +25-08-31 08:08:23 | D | + error = 4660.8349 +25-08-31 08:08:23 | D | + scale = [min=1.2655, max=6.4856] +25-08-31 08:08:23 | D | - single_transformer_blocks.22.down_proj +25-08-31 08:08:23 | D | + w: sfp4_e2m1_all +25-08-31 08:08:23 | D | + x: sfp4_e2m1_all +25-08-31 08:08:23 | D | + y: None +25-08-31 08:08:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:08:23 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:08:26 | D | + x - AbsMax +25-08-31 08:08:26 | D | + x = [min=0.7227, max=10.3125] +25-08-31 08:08:26 | D | + w - AbsMax +25-08-31 08:08:26 | D | + w = [min=0.1118, max=0.8711] +25-08-31 08:08:26 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:08:28 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:10:24 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:10:24 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:10:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:10:24 | D | - sum error = [ 5449.2822, 5451.9618, 5459.7441, 5457.8234, 5472.6887] +25-08-31 08:10:24 | D | - best error = [ 5449.2822, 5449.2822, 5449.2822, 5449.2822, 5449.2822] +25-08-31 08:10:24 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:10:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:10:24 | D | - sum error = [ 5482.7934, 5484.6429, 5490.2013, 5797.2970, 5515.0322] +25-08-31 08:10:24 | D | - best error = [ 5449.2822, 5449.2822, 5449.2822, 5449.2822, 5449.2822] +25-08-31 08:10:24 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:10:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:10:24 | D | - sum error = [ 5552.2316, 5542.2853, 5582.8133, 5587.6049, 5627.6286] +25-08-31 08:10:24 | D | - best error = [ 5449.2822, 5449.2822, 5449.2822, 5449.2822, 5449.2822] +25-08-31 08:10:24 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:10:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:10:24 | D | - sum error = [ 5650.8884, 5662.2520, 5697.9438, 5738.5561, 5789.3309] +25-08-31 08:10:24 | D | - best error = [ 5449.2822, 5449.2822, 5449.2822, 5449.2822, 5449.2822] +25-08-31 08:10:24 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:10:24 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:10:24 | D | - sum error = [ 5887.0067, 5842.2821, 5788.9312, 5746.2666, 5704.8132] +25-08-31 08:10:24 | D | - best error = [ 5449.2822, 5449.2822, 5449.2822, 5449.2822, 5449.2822] +25-08-31 08:10:24 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:10:24 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:10:24 | D | - sum error = [ 5658.3524, 5624.7679, 5608.3400, 5593.7024, 5586.7056] +25-08-31 08:10:24 | D | - best error = [ 5449.2822, 5449.2822, 5449.2822, 5449.2822, 5449.2822] +25-08-31 08:10:24 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:10:24 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:10:24 | D | - sum error = [ 5589.4993, 5599.1855, 5608.9717, 5625.0102, 5640.7070] +25-08-31 08:10:24 | D | - best error = [ 5449.2822, 5449.2822, 5449.2822, 5449.2822, 5449.2822] +25-08-31 08:10:24 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:10:24 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:10:24 | D | - sum error = [ 5662.4500, 5685.7647, 5724.8724, 5765.1350] +25-08-31 08:10:24 | D | - best error = [ 5449.2822, 5449.2822, 5449.2822, 5449.2822] +25-08-31 08:10:24 | D | + error = 5449.2822 +25-08-31 08:10:24 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 08:10:43 | D | - Smoothing Diffusion Block single_transformer_blocks.23 +25-08-31 08:10:43 | D | - Skipping Module single_transformer_blocks.23.norm.linear +25-08-31 08:10:43 | D | - Smoothing Transformer Block single_transformer_blocks.23 +25-08-31 08:10:43 | D | - single_transformer_blocks.23.attn.qkv_proj + single_transformer_blocks.23.up_proj +25-08-31 08:10:43 | D | + w: sfp4_e2m1_all +25-08-31 08:10:43 | D | + x: sfp4_e2m1_all +25-08-31 08:10:43 | D | + y: None +25-08-31 08:10:43 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:10:43 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:10:43 | D | + x - AbsMax +25-08-31 08:10:43 | D | + x = [min=0.1836, max=22.7500] +25-08-31 08:10:43 | D | + w - AbsMax +25-08-31 08:10:43 | D | + w = [min=0.0913, max=0.7578] +25-08-31 08:10:43 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:10:44 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:12:53 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:12:53 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:12:53 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:12:53 | D | - sum error = [55731.4774, 54850.5274, 54032.2535, 53438.4072, 53163.3558] +25-08-31 08:12:53 | D | - best error = [55731.4774, 54850.5274, 54032.2535, 53438.4072, 53163.3558] +25-08-31 08:12:53 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:12:53 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:12:53 | D | - sum error = [52843.8945, 52555.8233, 52169.9622, 51845.1899, 51527.6988] +25-08-31 08:12:53 | D | - best error = [52843.8945, 52555.8233, 52169.9622, 51845.1899, 51527.6988] +25-08-31 08:12:53 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:12:53 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:12:53 | D | - sum error = [51457.1915, 51331.7555, 51148.4490, 51005.5357, 50959.4625] +25-08-31 08:12:53 | D | - best error = [51457.1915, 51331.7555, 51148.4490, 51005.5357, 50959.4625] +25-08-31 08:12:53 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:12:53 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:12:53 | D | - sum error = [50790.7015, 50947.2411, 51010.0727, 50912.5657, 50921.6056] +25-08-31 08:12:53 | D | - best error = [50790.7015, 50790.7015, 50790.7015, 50790.7015, 50790.7015] +25-08-31 08:12:53 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:12:53 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:12:53 | D | - sum error = [71682.0107, 68007.8000, 64965.3357, 62694.2090, 60597.3908] +25-08-31 08:12:53 | D | - best error = [50790.7015, 50790.7015, 50790.7015, 50790.7015, 50790.7015] +25-08-31 08:12:53 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:12:53 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:12:53 | D | - sum error = [59067.5773, 57323.0453, 56292.6343, 55251.9941, 53779.4869] +25-08-31 08:12:53 | D | - best error = [50790.7015, 50790.7015, 50790.7015, 50790.7015, 50790.7015] +25-08-31 08:12:53 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:12:53 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:12:53 | D | - sum error = [53029.0330, 52507.3343, 52427.7105, 51761.4912, 51569.6357] +25-08-31 08:12:53 | D | - best error = [50790.7015, 50790.7015, 50790.7015, 50790.7015, 50790.7015] +25-08-31 08:12:53 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:12:53 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:12:53 | D | - sum error = [51113.0762, 51087.0053, 51198.9650, 51180.2812] +25-08-31 08:12:53 | D | - best error = [50790.7015, 50790.7015, 50790.7015, 50790.7015] +25-08-31 08:12:53 | D | + error = 50790.7015 +25-08-31 08:12:53 | D | + scale = [min=0.2805, max=10.4168] +25-08-31 08:12:53 | D | - single_transformer_blocks.23.attn.out_proj +25-08-31 08:12:53 | D | + w: sfp4_e2m1_all +25-08-31 08:12:53 | D | + x: sfp4_e2m1_all +25-08-31 08:12:53 | D | + y: None +25-08-31 08:12:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:12:53 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:12:54 | D | + x - AbsMax +25-08-31 08:12:54 | D | + x = [min=0.8945, max=6.5312] +25-08-31 08:12:54 | D | + w - AbsMax +25-08-31 08:12:54 | D | + w = [min=0.1191, max=0.4102] +25-08-31 08:12:54 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:12:55 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:13:55 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:13:55 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:13:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:13:55 | D | - sum error = [ 4750.9085, 4733.7446, 4736.4333, 4745.5288, 4737.7750] +25-08-31 08:13:55 | D | - best error = [ 4750.9085, 4733.7446, 4733.7446, 4733.7446, 4733.7446] +25-08-31 08:13:55 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:13:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:13:55 | D | - sum error = [ 4734.7462, 4718.7038, 4738.6766, 4726.9578, 4723.6211] +25-08-31 08:13:55 | D | - best error = [ 4733.7446, 4718.7038, 4718.7038, 4718.7038, 4718.7038] +25-08-31 08:13:55 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:13:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:13:55 | D | - sum error = [ 4709.4000, 4721.6436, 4719.3351, 4724.1558, 4720.8137] +25-08-31 08:13:55 | D | - best error = [ 4709.4000, 4709.4000, 4709.4000, 4709.4000, 4709.4000] +25-08-31 08:13:55 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:13:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:13:55 | D | - sum error = [ 4730.5393, 4728.6436, 4728.8045, 4742.2851, 4751.8899] +25-08-31 08:13:55 | D | - best error = [ 4709.4000, 4709.4000, 4709.4000, 4709.4000, 4709.4000] +25-08-31 08:13:55 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:13:55 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:13:55 | D | - sum error = [ 4839.2371, 4820.4079, 4803.6846, 4793.9583, 4794.6531] +25-08-31 08:13:55 | D | - best error = [ 4709.4000, 4709.4000, 4709.4000, 4709.4000, 4709.4000] +25-08-31 08:13:55 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:13:55 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:13:55 | D | - sum error = [ 4792.6813, 4781.9226, 4782.1876, 4760.3765, 4754.2253] +25-08-31 08:13:55 | D | - best error = [ 4709.4000, 4709.4000, 4709.4000, 4709.4000, 4709.4000] +25-08-31 08:13:55 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:13:55 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:13:55 | D | - sum error = [ 4754.8287, 4755.5732, 4745.4136, 4735.5494, 4732.6939] +25-08-31 08:13:55 | D | - best error = [ 4709.4000, 4709.4000, 4709.4000, 4709.4000, 4709.4000] +25-08-31 08:13:55 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:13:55 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:13:55 | D | - sum error = [ 4756.2708, 4750.7949, 4747.0559, 4759.2945] +25-08-31 08:13:55 | D | - best error = [ 4709.4000, 4709.4000, 4709.4000, 4709.4000] +25-08-31 08:13:55 | D | + error = 4709.4000 +25-08-31 08:13:55 | D | + scale = [min=0.9458, max=2.5556] +25-08-31 08:13:55 | D | - single_transformer_blocks.23.down_proj +25-08-31 08:13:55 | D | + w: sfp4_e2m1_all +25-08-31 08:13:55 | D | + x: sfp4_e2m1_all +25-08-31 08:13:55 | D | + y: None +25-08-31 08:13:55 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:13:55 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:13:58 | D | + x - AbsMax +25-08-31 08:13:58 | D | + x = [min=0.5508, max=11.0000] +25-08-31 08:13:58 | D | + w - AbsMax +25-08-31 08:13:58 | D | + w = [min=0.1074, max=1.0547] +25-08-31 08:13:58 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:14:00 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:16:01 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:16:01 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:16:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:16:01 | D | - sum error = [ 5122.7612, 5138.8483, 5132.8168, 5184.6253, 5142.9077] +25-08-31 08:16:01 | D | - best error = [ 5122.7612, 5122.7612, 5122.7612, 5122.7612, 5122.7612] +25-08-31 08:16:01 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:16:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:16:01 | D | - sum error = [ 5156.4292, 5165.0564, 5166.1715, 5172.8018, 5177.4241] +25-08-31 08:16:01 | D | - best error = [ 5122.7612, 5122.7612, 5122.7612, 5122.7612, 5122.7612] +25-08-31 08:16:01 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:16:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:16:01 | D | - sum error = [ 5199.8606, 5221.4082, 5226.4638, 5246.6267, 5258.5350] +25-08-31 08:16:01 | D | - best error = [ 5122.7612, 5122.7612, 5122.7612, 5122.7612, 5122.7612] +25-08-31 08:16:01 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:16:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:16:01 | D | - sum error = [ 5293.5583, 5302.1020, 5325.9384, 5362.1507, 5396.9365] +25-08-31 08:16:01 | D | - best error = [ 5122.7612, 5122.7612, 5122.7612, 5122.7612, 5122.7612] +25-08-31 08:16:01 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:16:01 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:16:01 | D | - sum error = [ 5491.0726, 5887.9069, 5417.2460, 5378.8499, 5326.3646] +25-08-31 08:16:01 | D | - best error = [ 5122.7612, 5122.7612, 5122.7612, 5122.7612, 5122.7612] +25-08-31 08:16:01 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:16:01 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:16:01 | D | - sum error = [ 5281.7020, 5262.0496, 5233.2302, 5231.6967, 5232.4887] +25-08-31 08:16:01 | D | - best error = [ 5122.7612, 5122.7612, 5122.7612, 5122.7612, 5122.7612] +25-08-31 08:16:01 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:16:01 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:16:01 | D | - sum error = [ 5231.0139, 5234.2272, 5236.4339, 5246.9321, 5264.8966] +25-08-31 08:16:01 | D | - best error = [ 5122.7612, 5122.7612, 5122.7612, 5122.7612, 5122.7612] +25-08-31 08:16:01 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:16:01 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:16:01 | D | - sum error = [ 5304.5663, 5319.1830, 5352.5669, 5401.8030] +25-08-31 08:16:01 | D | - best error = [ 5122.7612, 5122.7612, 5122.7612, 5122.7612] +25-08-31 08:16:01 | D | + error = 5122.7612 +25-08-31 08:16:01 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 08:16:21 | D | - Smoothing Diffusion Block single_transformer_blocks.24 +25-08-31 08:16:21 | D | - Skipping Module single_transformer_blocks.24.norm.linear +25-08-31 08:16:21 | D | - Smoothing Transformer Block single_transformer_blocks.24 +25-08-31 08:16:21 | D | - single_transformer_blocks.24.attn.qkv_proj + single_transformer_blocks.24.up_proj +25-08-31 08:16:21 | D | + w: sfp4_e2m1_all +25-08-31 08:16:21 | D | + x: sfp4_e2m1_all +25-08-31 08:16:21 | D | + y: None +25-08-31 08:16:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:16:21 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:16:21 | D | + x - AbsMax +25-08-31 08:16:21 | D | + x = [min=0.0986, max=19.1250] +25-08-31 08:16:21 | D | + w - AbsMax +25-08-31 08:16:21 | D | + w = [min=0.0781, max=1.0938] +25-08-31 08:16:21 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:16:22 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:18:30 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:18:30 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:18:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:18:30 | D | - sum error = [57293.9179, 56503.8952, 55932.3591, 55238.9312, 54754.0081] +25-08-31 08:18:30 | D | - best error = [57293.9179, 56503.8952, 55932.3591, 55238.9312, 54754.0081] +25-08-31 08:18:30 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:18:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:18:30 | D | - sum error = [54174.5729, 53815.3287, 53394.5316, 53064.3926, 52941.8628] +25-08-31 08:18:30 | D | - best error = [54174.5729, 53815.3287, 53394.5316, 53064.3926, 52941.8628] +25-08-31 08:18:30 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:18:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:18:30 | D | - sum error = [52936.3935, 52850.8410, 52962.2815, 52472.6824, 52327.2264] +25-08-31 08:18:30 | D | - best error = [52936.3935, 52850.8410, 52850.8410, 52472.6824, 52327.2264] +25-08-31 08:18:30 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:18:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:18:30 | D | - sum error = [52373.7656, 52336.2568, 52248.1660, 52314.9943, 52433.1889] +25-08-31 08:18:30 | D | - best error = [52327.2264, 52327.2264, 52248.1660, 52248.1660, 52248.1660] +25-08-31 08:18:30 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:18:30 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:18:30 | D | - sum error = [66541.3705, 64346.9854, 62310.0028, 60732.9378, 59100.4049] +25-08-31 08:18:30 | D | - best error = [52248.1660, 52248.1660, 52248.1660, 52248.1660, 52248.1660] +25-08-31 08:18:30 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:18:30 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:18:30 | D | - sum error = [57560.9201, 56706.1173, 55505.4874, 54847.0408, 54182.6859] +25-08-31 08:18:30 | D | - best error = [52248.1660, 52248.1660, 52248.1660, 52248.1660, 52248.1660] +25-08-31 08:18:30 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:18:30 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:18:30 | D | - sum error = [53676.2085, 53335.4210, 53100.7652, 52719.9296, 52651.4808] +25-08-31 08:18:30 | D | - best error = [52248.1660, 52248.1660, 52248.1660, 52248.1660, 52248.1660] +25-08-31 08:18:30 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:18:30 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:18:30 | D | - sum error = [52410.0729, 52330.5090, 52415.2527, 52321.1963] +25-08-31 08:18:30 | D | - best error = [52248.1660, 52248.1660, 52248.1660, 52248.1660] +25-08-31 08:18:30 | D | + error = 52248.1660 +25-08-31 08:18:30 | D | + scale = [min=0.1396, max=12.2846] +25-08-31 08:18:30 | D | - single_transformer_blocks.24.attn.out_proj +25-08-31 08:18:30 | D | + w: sfp4_e2m1_all +25-08-31 08:18:30 | D | + x: sfp4_e2m1_all +25-08-31 08:18:30 | D | + y: None +25-08-31 08:18:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:18:30 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:18:30 | D | + x - AbsMax +25-08-31 08:18:30 | D | + x = [min=0.8945, max=5.5312] +25-08-31 08:18:30 | D | + w - AbsMax +25-08-31 08:18:30 | D | + w = [min=0.1191, max=0.3711] +25-08-31 08:18:30 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:18:31 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:19:28 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:19:28 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:19:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:19:28 | D | - sum error = [ 4477.0367, 4463.5289, 4463.7861, 4462.4318, 4460.3359] +25-08-31 08:19:28 | D | - best error = [ 4477.0367, 4463.5289, 4463.5289, 4462.4318, 4460.3359] +25-08-31 08:19:28 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:19:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:19:28 | D | - sum error = [ 4444.0290, 4447.2520, 4438.3474, 4437.4928, 4440.8947] +25-08-31 08:19:28 | D | - best error = [ 4444.0290, 4444.0290, 4438.3474, 4437.4928, 4437.4928] +25-08-31 08:19:28 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:19:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:19:28 | D | - sum error = [ 4431.7958, 4439.8340, 4437.8544, 4426.5576, 4430.4923] +25-08-31 08:19:28 | D | - best error = [ 4431.7958, 4431.7958, 4431.7958, 4426.5576, 4426.5576] +25-08-31 08:19:28 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:19:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:19:28 | D | - sum error = [ 4440.8416, 4437.9446, 4448.5449, 4454.7417, 4456.2747] +25-08-31 08:19:28 | D | - best error = [ 4426.5576, 4426.5576, 4426.5576, 4426.5576, 4426.5576] +25-08-31 08:19:28 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:19:28 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:19:28 | D | - sum error = [ 4545.7646, 4536.5236, 4513.8751, 4495.5812, 4492.7390] +25-08-31 08:19:28 | D | - best error = [ 4426.5576, 4426.5576, 4426.5576, 4426.5576, 4426.5576] +25-08-31 08:19:28 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:19:28 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:19:28 | D | - sum error = [ 4503.5151, 4471.7648, 4474.1258, 4469.1131, 4456.6196] +25-08-31 08:19:28 | D | - best error = [ 4426.5576, 4426.5576, 4426.5576, 4426.5576, 4426.5576] +25-08-31 08:19:28 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:19:28 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:19:28 | D | - sum error = [ 4449.3471, 4446.3846, 4458.2615, 4462.4980, 4451.3679] +25-08-31 08:19:28 | D | - best error = [ 4426.5576, 4426.5576, 4426.5576, 4426.5576, 4426.5576] +25-08-31 08:19:28 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:19:28 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:19:28 | D | - sum error = [ 4448.5296, 4447.4163, 4449.4512, 4450.3537] +25-08-31 08:19:28 | D | - best error = [ 4426.5576, 4426.5576, 4426.5576, 4426.5576] +25-08-31 08:19:28 | D | + error = 4426.5576 +25-08-31 08:19:28 | D | + scale = [min=0.9301, max=3.0397] +25-08-31 08:19:28 | D | - single_transformer_blocks.24.down_proj +25-08-31 08:19:28 | D | + w: sfp4_e2m1_all +25-08-31 08:19:28 | D | + x: sfp4_e2m1_all +25-08-31 08:19:28 | D | + y: None +25-08-31 08:19:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:19:28 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:19:31 | D | + x - AbsMax +25-08-31 08:19:31 | D | + x = [min=1.2109, max=11.3750] +25-08-31 08:19:31 | D | + w - AbsMax +25-08-31 08:19:31 | D | + w = [min=0.1079, max=0.9570] +25-08-31 08:19:31 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:19:34 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:21:33 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:21:33 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:21:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:21:33 | D | - sum error = [ 5333.9066, 5349.8198, 5346.3180, 5355.0888, 5357.2000] +25-08-31 08:21:33 | D | - best error = [ 5333.9066, 5333.9066, 5333.9066, 5333.9066, 5333.9066] +25-08-31 08:21:33 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:21:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:21:33 | D | - sum error = [ 5366.4195, 5371.4315, 5389.4007, 5388.6891, 5388.7004] +25-08-31 08:21:33 | D | - best error = [ 5333.9066, 5333.9066, 5333.9066, 5333.9066, 5333.9066] +25-08-31 08:21:33 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:21:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:21:33 | D | - sum error = [ 5421.4856, 5420.2124, 5435.5042, 5448.5244, 5451.0685] +25-08-31 08:21:33 | D | - best error = [ 5333.9066, 5333.9066, 5333.9066, 5333.9066, 5333.9066] +25-08-31 08:21:33 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:21:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:21:33 | D | - sum error = [ 5485.8516, 5500.8219, 5525.6127, 5544.8562, 5574.1094] +25-08-31 08:21:33 | D | - best error = [ 5333.9066, 5333.9066, 5333.9066, 5333.9066, 5333.9066] +25-08-31 08:21:33 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:21:33 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:21:33 | D | - sum error = [ 6131.6383, 5715.4818, 5678.1742, 5640.6475, 5579.3234] +25-08-31 08:21:33 | D | - best error = [ 5333.9066, 5333.9066, 5333.9066, 5333.9066, 5333.9066] +25-08-31 08:21:33 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:21:33 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:21:33 | D | - sum error = [ 5541.4370, 5508.1598, 5492.0137, 5490.4798, 5491.7720] +25-08-31 08:21:33 | D | - best error = [ 5333.9066, 5333.9066, 5333.9066, 5333.9066, 5333.9066] +25-08-31 08:21:33 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:21:33 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:21:33 | D | - sum error = [ 5478.4803, 5482.9470, 5479.0231, 5476.5168, 5484.5115] +25-08-31 08:21:33 | D | - best error = [ 5333.9066, 5333.9066, 5333.9066, 5333.9066, 5333.9066] +25-08-31 08:21:33 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:21:33 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:21:33 | D | - sum error = [ 5496.7698, 5516.8342, 5550.4598, 5571.0800] +25-08-31 08:21:33 | D | - best error = [ 5333.9066, 5333.9066, 5333.9066, 5333.9066] +25-08-31 08:21:33 | D | + error = 5333.9066 +25-08-31 08:21:33 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 08:21:54 | D | - Smoothing Diffusion Block single_transformer_blocks.25 +25-08-31 08:21:54 | D | - Skipping Module single_transformer_blocks.25.norm.linear +25-08-31 08:21:54 | D | - Smoothing Transformer Block single_transformer_blocks.25 +25-08-31 08:21:54 | D | - single_transformer_blocks.25.attn.qkv_proj + single_transformer_blocks.25.up_proj +25-08-31 08:21:54 | D | + w: sfp4_e2m1_all +25-08-31 08:21:54 | D | + x: sfp4_e2m1_all +25-08-31 08:21:54 | D | + y: None +25-08-31 08:21:54 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:21:54 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:21:54 | D | + x - AbsMax +25-08-31 08:21:54 | D | + x = [min=0.3125, max=18.5000] +25-08-31 08:21:54 | D | + w - AbsMax +25-08-31 08:21:54 | D | + w = [min=0.1206, max=1.1562] +25-08-31 08:21:54 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:21:55 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:24:02 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:24:02 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:24:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:24:02 | D | - sum error = [66302.7490, 65744.5159, 65313.5326, 65066.0848, 64378.3775] +25-08-31 08:24:02 | D | - best error = [66302.7490, 65744.5159, 65313.5326, 65066.0848, 64378.3775] +25-08-31 08:24:02 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:24:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:24:02 | D | - sum error = [64185.3117, 63738.8753, 63610.4183, 63212.1831, 63300.7925] +25-08-31 08:24:02 | D | - best error = [64185.3117, 63738.8753, 63610.4183, 63212.1831, 63212.1831] +25-08-31 08:24:02 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:24:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:24:02 | D | - sum error = [63255.3277, 62853.2564, 62869.6378, 62867.9642, 62822.2452] +25-08-31 08:24:02 | D | - best error = [63212.1831, 62853.2564, 62853.2564, 62853.2564, 62822.2452] +25-08-31 08:24:02 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:24:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:24:02 | D | - sum error = [62742.6571, 62684.2041, 62686.8586, 62604.1665, 62721.8530] +25-08-31 08:24:02 | D | - best error = [62742.6571, 62684.2041, 62684.2041, 62604.1665, 62604.1665] +25-08-31 08:24:02 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:24:02 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:24:02 | D | - sum error = [77901.1788, 75380.6375, 72870.8192, 71254.5732, 69771.6321] +25-08-31 08:24:02 | D | - best error = [62604.1665, 62604.1665, 62604.1665, 62604.1665, 62604.1665] +25-08-31 08:24:02 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:24:02 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:24:02 | D | - sum error = [68208.0094, 66965.0904, 66441.8451, 65456.1447, 64745.0948] +25-08-31 08:24:02 | D | - best error = [62604.1665, 62604.1665, 62604.1665, 62604.1665, 62604.1665] +25-08-31 08:24:02 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:24:02 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:24:02 | D | - sum error = [64381.8528, 63770.6089, 63568.0081, 63175.0432, 63081.5173] +25-08-31 08:24:02 | D | - best error = [62604.1665, 62604.1665, 62604.1665, 62604.1665, 62604.1665] +25-08-31 08:24:02 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:24:02 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:24:02 | D | - sum error = [62862.3609, 63051.4264, 62627.4072, 62628.5006] +25-08-31 08:24:02 | D | - best error = [62604.1665, 62604.1665, 62604.1665, 62604.1665] +25-08-31 08:24:02 | D | + error = 62604.1665 +25-08-31 08:24:02 | D | + scale = [min=0.3510, max=13.8183] +25-08-31 08:24:02 | D | - single_transformer_blocks.25.attn.out_proj +25-08-31 08:24:02 | D | + w: sfp4_e2m1_all +25-08-31 08:24:02 | D | + x: sfp4_e2m1_all +25-08-31 08:24:02 | D | + y: None +25-08-31 08:24:02 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:24:02 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:24:03 | D | + x - AbsMax +25-08-31 08:24:03 | D | + x = [min=1.0156, max=7.8750] +25-08-31 08:24:03 | D | + w - AbsMax +25-08-31 08:24:03 | D | + w = [min=0.1094, max=0.4648] +25-08-31 08:24:03 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:24:04 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:25:01 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:25:01 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:25:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:25:01 | D | - sum error = [ 5692.3969, 5676.4887, 5692.5985, 5678.9335, 5682.8739] +25-08-31 08:25:01 | D | - best error = [ 5692.3969, 5676.4887, 5676.4887, 5676.4887, 5676.4887] +25-08-31 08:25:01 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:25:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:25:01 | D | - sum error = [ 5664.0743, 5656.5444, 5659.3099, 5651.9053, 5647.8045] +25-08-31 08:25:01 | D | - best error = [ 5664.0743, 5656.5444, 5656.5444, 5651.9053, 5647.8045] +25-08-31 08:25:01 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:25:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:25:01 | D | - sum error = [ 5651.3540, 5649.4502, 5648.3240, 5656.1066, 5633.0674] +25-08-31 08:25:01 | D | - best error = [ 5647.8045, 5647.8045, 5647.8045, 5647.8045, 5633.0674] +25-08-31 08:25:01 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:25:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:25:01 | D | - sum error = [ 5638.3824, 5648.1707, 5654.5472, 5646.6716, 5658.3019] +25-08-31 08:25:01 | D | - best error = [ 5633.0674, 5633.0674, 5633.0674, 5633.0674, 5633.0674] +25-08-31 08:25:01 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:25:01 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:25:01 | D | - sum error = [ 5788.0470, 5767.3197, 5761.3766, 5743.1450, 5735.1485] +25-08-31 08:25:01 | D | - best error = [ 5633.0674, 5633.0674, 5633.0674, 5633.0674, 5633.0674] +25-08-31 08:25:01 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:25:01 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:25:01 | D | - sum error = [ 5719.9895, 5714.5339, 5712.2580, 5685.0887, 5679.2573] +25-08-31 08:25:01 | D | - best error = [ 5633.0674, 5633.0674, 5633.0674, 5633.0674, 5633.0674] +25-08-31 08:25:01 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:25:01 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:25:01 | D | - sum error = [ 5690.9971, 5662.9943, 5656.0778, 5657.6767, 5644.3873] +25-08-31 08:25:01 | D | - best error = [ 5633.0674, 5633.0674, 5633.0674, 5633.0674, 5633.0674] +25-08-31 08:25:01 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:25:01 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:25:01 | D | - sum error = [ 5663.4640, 5664.8809, 5660.8194, 5666.3357] +25-08-31 08:25:01 | D | - best error = [ 5633.0674, 5633.0674, 5633.0674, 5633.0674] +25-08-31 08:25:01 | D | + error = 5633.0674 +25-08-31 08:25:01 | D | + scale = [min=1.0109, max=4.2401] +25-08-31 08:25:02 | D | - single_transformer_blocks.25.down_proj +25-08-31 08:25:02 | D | + w: sfp4_e2m1_all +25-08-31 08:25:02 | D | + x: sfp4_e2m1_all +25-08-31 08:25:02 | D | + y: None +25-08-31 08:25:02 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:25:02 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:25:04 | D | + x - AbsMax +25-08-31 08:25:04 | D | + x = [min=0.7578, max=12.5625] +25-08-31 08:25:04 | D | + w - AbsMax +25-08-31 08:25:04 | D | + w = [min=0.1108, max=1.3359] +25-08-31 08:25:04 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:25:07 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:27:03 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:27:03 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:27:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:27:03 | D | - sum error = [ 5547.5634, 5556.6116, 5570.1007, 5561.0236, 5558.9161] +25-08-31 08:27:03 | D | - best error = [ 5547.5634, 5547.5634, 5547.5634, 5547.5634, 5547.5634] +25-08-31 08:27:03 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:27:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:27:03 | D | - sum error = [ 5551.7282, 5586.9102, 5583.9756, 5626.3445, 5608.9559] +25-08-31 08:27:03 | D | - best error = [ 5547.5634, 5547.5634, 5547.5634, 5547.5634, 5547.5634] +25-08-31 08:27:03 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:27:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:27:03 | D | - sum error = [ 5625.9477, 5627.7365, 5635.0409, 5655.8055, 5680.5783] +25-08-31 08:27:03 | D | - best error = [ 5547.5634, 5547.5634, 5547.5634, 5547.5634, 5547.5634] +25-08-31 08:27:03 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:27:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:27:03 | D | - sum error = [ 5696.3864, 5720.7057, 5749.0872, 5783.5328, 5821.3672] +25-08-31 08:27:03 | D | - best error = [ 5547.5634, 5547.5634, 5547.5634, 5547.5634, 5547.5634] +25-08-31 08:27:03 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:27:03 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:27:03 | D | - sum error = [ 6079.2427, 6017.6135, 5952.9549, 5890.4764, 5831.9150] +25-08-31 08:27:03 | D | - best error = [ 5547.5634, 5547.5634, 5547.5634, 5547.5634, 5547.5634] +25-08-31 08:27:03 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:27:03 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:27:03 | D | - sum error = [ 5814.2181, 5759.0791, 5728.6845, 5706.0992, 5702.0993] +25-08-31 08:27:03 | D | - best error = [ 5547.5634, 5547.5634, 5547.5634, 5547.5634, 5547.5634] +25-08-31 08:27:03 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:27:03 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:27:03 | D | - sum error = [ 5684.3809, 5760.7364, 5683.7067, 5696.1907, 5708.7216] +25-08-31 08:27:03 | D | - best error = [ 5547.5634, 5547.5634, 5547.5634, 5547.5634, 5547.5634] +25-08-31 08:27:03 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:27:03 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:27:03 | D | - sum error = [ 5712.4347, 5741.7309, 5763.8331, 5823.5965] +25-08-31 08:27:03 | D | - best error = [ 5547.5634, 5547.5634, 5547.5634, 5547.5634] +25-08-31 08:27:03 | D | + error = 5547.5634 +25-08-31 08:27:03 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 08:27:21 | D | - Smoothing Diffusion Block single_transformer_blocks.26 +25-08-31 08:27:21 | D | - Skipping Module single_transformer_blocks.26.norm.linear +25-08-31 08:27:21 | D | - Smoothing Transformer Block single_transformer_blocks.26 +25-08-31 08:27:21 | D | - single_transformer_blocks.26.attn.qkv_proj + single_transformer_blocks.26.up_proj +25-08-31 08:27:21 | D | + w: sfp4_e2m1_all +25-08-31 08:27:21 | D | + x: sfp4_e2m1_all +25-08-31 08:27:21 | D | + y: None +25-08-31 08:27:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:27:21 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:27:22 | D | + x - AbsMax +25-08-31 08:27:22 | D | + x = [min=0.4102, max=20.3750] +25-08-31 08:27:22 | D | + w - AbsMax +25-08-31 08:27:22 | D | + w = [min=0.0713, max=0.9023] +25-08-31 08:27:22 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:27:23 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:29:30 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:29:30 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:29:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:29:30 | D | - sum error = [66437.7994, 65545.5306, 64867.0784, 64124.5688, 64101.8789] +25-08-31 08:29:30 | D | - best error = [66437.7994, 65545.5306, 64867.0784, 64124.5688, 64101.8789] +25-08-31 08:29:30 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:29:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:29:30 | D | - sum error = [63588.9359, 62947.1906, 62632.3244, 62560.6240, 62620.5191] +25-08-31 08:29:30 | D | - best error = [63588.9359, 62947.1906, 62632.3244, 62560.6240, 62560.6240] +25-08-31 08:29:30 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:29:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:29:30 | D | - sum error = [62676.5503, 61960.0491, 61993.8350, 62071.4365, 61933.2740] +25-08-31 08:29:30 | D | - best error = [62560.6240, 61960.0491, 61960.0491, 61960.0491, 61933.2740] +25-08-31 08:29:30 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:29:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:29:30 | D | - sum error = [61868.1850, 61981.9225, 62038.6378, 62106.9034, 62091.1619] +25-08-31 08:29:30 | D | - best error = [61868.1850, 61868.1850, 61868.1850, 61868.1850, 61868.1850] +25-08-31 08:29:30 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:29:30 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:29:30 | D | - sum error = [77207.8736, 74428.0790, 72217.4388, 70724.0325, 68932.4715] +25-08-31 08:29:30 | D | - best error = [61868.1850, 61868.1850, 61868.1850, 61868.1850, 61868.1850] +25-08-31 08:29:30 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:29:30 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:29:30 | D | - sum error = [67453.0777, 66502.3019, 65422.7928, 64536.8662, 64041.7204] +25-08-31 08:29:30 | D | - best error = [61868.1850, 61868.1850, 61868.1850, 61868.1850, 61868.1850] +25-08-31 08:29:30 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:29:30 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:29:30 | D | - sum error = [63292.7481, 62810.7975, 62519.9237, 62343.4944, 62273.1881] +25-08-31 08:29:30 | D | - best error = [61868.1850, 61868.1850, 61868.1850, 61868.1850, 61868.1850] +25-08-31 08:29:30 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:29:30 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:29:30 | D | - sum error = [62237.1290, 62033.7931, 62114.3101, 61974.6549] +25-08-31 08:29:30 | D | - best error = [61868.1850, 61868.1850, 61868.1850, 61868.1850] +25-08-31 08:29:30 | D | + error = 61868.1850 +25-08-31 08:29:30 | D | + scale = [min=0.5125, max=9.5901] +25-08-31 08:29:31 | D | - single_transformer_blocks.26.attn.out_proj +25-08-31 08:29:31 | D | + w: sfp4_e2m1_all +25-08-31 08:29:31 | D | + x: sfp4_e2m1_all +25-08-31 08:29:31 | D | + y: None +25-08-31 08:29:31 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:29:31 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:29:31 | D | + x - AbsMax +25-08-31 08:29:31 | D | + x = [min=0.8086, max=6.4062] +25-08-31 08:29:31 | D | + w - AbsMax +25-08-31 08:29:31 | D | + w = [min=0.1084, max=0.4355] +25-08-31 08:29:31 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:29:32 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:30:29 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:30:29 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:30:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:30:29 | D | - sum error = [ 4693.2198, 4679.3771, 4676.5757, 4669.3012, 4671.9199] +25-08-31 08:30:29 | D | - best error = [ 4693.2198, 4679.3771, 4676.5757, 4669.3012, 4669.3012] +25-08-31 08:30:29 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:30:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:30:29 | D | - sum error = [ 4663.5182, 4657.6415, 4653.7961, 4667.3637, 4657.2601] +25-08-31 08:30:29 | D | - best error = [ 4663.5182, 4657.6415, 4653.7961, 4653.7961, 4653.7961] +25-08-31 08:30:29 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:30:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:30:29 | D | - sum error = [ 4655.6173, 4638.5034, 4654.0844, 4653.1435, 4650.0323] +25-08-31 08:30:29 | D | - best error = [ 4653.7961, 4638.5034, 4638.5034, 4638.5034, 4638.5034] +25-08-31 08:30:29 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:30:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:30:29 | D | - sum error = [ 4659.9326, 4659.5686, 4650.5058, 4674.8844, 4689.3771] +25-08-31 08:30:29 | D | - best error = [ 4638.5034, 4638.5034, 4638.5034, 4638.5034, 4638.5034] +25-08-31 08:30:29 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:30:29 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:30:29 | D | - sum error = [ 4782.1439, 4757.9708, 4747.6773, 4738.9672, 4736.7563] +25-08-31 08:30:29 | D | - best error = [ 4638.5034, 4638.5034, 4638.5034, 4638.5034, 4638.5034] +25-08-31 08:30:29 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:30:29 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:30:29 | D | - sum error = [ 4720.6468, 4695.6650, 4699.7085, 4677.5630, 4687.6056] +25-08-31 08:30:29 | D | - best error = [ 4638.5034, 4638.5034, 4638.5034, 4638.5034, 4638.5034] +25-08-31 08:30:29 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:30:29 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:30:29 | D | - sum error = [ 4674.5410, 4679.2999, 4672.7022, 4671.5736, 4676.2265] +25-08-31 08:30:29 | D | - best error = [ 4638.5034, 4638.5034, 4638.5034, 4638.5034, 4638.5034] +25-08-31 08:30:29 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:30:29 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:30:29 | D | - sum error = [ 4666.4003, 4673.5008, 4681.5976, 4687.1697] +25-08-31 08:30:29 | D | - best error = [ 4638.5034, 4638.5034, 4638.5034, 4638.5034] +25-08-31 08:30:29 | D | + error = 4638.5034 +25-08-31 08:30:29 | D | + scale = [min=0.8897, max=2.7774] +25-08-31 08:30:30 | D | - single_transformer_blocks.26.down_proj +25-08-31 08:30:30 | D | + w: sfp4_e2m1_all +25-08-31 08:30:30 | D | + x: sfp4_e2m1_all +25-08-31 08:30:30 | D | + y: None +25-08-31 08:30:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:30:30 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:30:32 | D | + x - AbsMax +25-08-31 08:30:32 | D | + x = [min=1.0312, max=12.7500] +25-08-31 08:30:32 | D | + w - AbsMax +25-08-31 08:30:32 | D | + w = [min=0.1104, max=1.4531] +25-08-31 08:30:32 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:30:34 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:32:39 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:32:39 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:32:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:32:39 | D | - sum error = [ 5449.2678, 5457.4417, 5459.4443, 5466.6769, 5692.2488] +25-08-31 08:32:39 | D | - best error = [ 5449.2678, 5449.2678, 5449.2678, 5449.2678, 5449.2678] +25-08-31 08:32:39 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:32:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:32:39 | D | - sum error = [ 5478.3634, 5485.3946, 5491.2022, 5494.8800, 5519.7007] +25-08-31 08:32:39 | D | - best error = [ 5449.2678, 5449.2678, 5449.2678, 5449.2678, 5449.2678] +25-08-31 08:32:39 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:32:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:32:39 | D | - sum error = [ 5530.1642, 5537.6105, 5543.9592, 5564.1312, 5597.5673] +25-08-31 08:32:39 | D | - best error = [ 5449.2678, 5449.2678, 5449.2678, 5449.2678, 5449.2678] +25-08-31 08:32:39 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:32:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:32:39 | D | - sum error = [ 5621.6083, 5644.9478, 5669.2843, 5717.0692, 5758.0945] +25-08-31 08:32:39 | D | - best error = [ 5449.2678, 5449.2678, 5449.2678, 5449.2678, 5449.2678] +25-08-31 08:32:39 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:32:39 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:32:39 | D | - sum error = [ 6228.1496, 6142.1026, 6056.6129, 5974.8603, 5894.4839] +25-08-31 08:32:39 | D | - best error = [ 5449.2678, 5449.2678, 5449.2678, 5449.2678, 5449.2678] +25-08-31 08:32:39 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:32:39 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:32:39 | D | - sum error = [ 5837.8712, 5779.5145, 5752.6161, 5732.4375, 5687.6336] +25-08-31 08:32:39 | D | - best error = [ 5449.2678, 5449.2678, 5449.2678, 5449.2678, 5449.2678] +25-08-31 08:32:39 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:32:39 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:32:39 | D | - sum error = [ 5676.9698, 5651.7396, 5660.5109, 5652.5667, 5647.9400] +25-08-31 08:32:39 | D | - best error = [ 5449.2678, 5449.2678, 5449.2678, 5449.2678, 5449.2678] +25-08-31 08:32:39 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:32:39 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:32:39 | D | - sum error = [ 5675.2045, 5690.0070, 5716.2169, 5741.1248] +25-08-31 08:32:39 | D | - best error = [ 5449.2678, 5449.2678, 5449.2678, 5449.2678] +25-08-31 08:32:39 | D | + error = 5449.2678 +25-08-31 08:32:39 | D | + scale = [min=1.0000, max=1.0000] +25-08-31 08:32:59 | D | - Smoothing Diffusion Block single_transformer_blocks.27 +25-08-31 08:32:59 | D | - Skipping Module single_transformer_blocks.27.norm.linear +25-08-31 08:32:59 | D | - Smoothing Transformer Block single_transformer_blocks.27 +25-08-31 08:32:59 | D | - single_transformer_blocks.27.attn.qkv_proj + single_transformer_blocks.27.up_proj +25-08-31 08:32:59 | D | + w: sfp4_e2m1_all +25-08-31 08:32:59 | D | + x: sfp4_e2m1_all +25-08-31 08:32:59 | D | + y: None +25-08-31 08:32:59 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:32:59 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:33:00 | D | + x - AbsMax +25-08-31 08:33:00 | D | + x = [min=1.1172, max=18.2500] +25-08-31 08:33:00 | D | + w - AbsMax +25-08-31 08:33:00 | D | + w = [min=0.1079, max=0.7500] +25-08-31 08:33:00 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:33:01 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:35:11 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:35:11 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:35:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:35:11 | D | - sum error = [77277.8699, 76505.4853, 75803.7247, 75193.9640, 74451.7326] +25-08-31 08:35:11 | D | - best error = [77277.8699, 76505.4853, 75803.7247, 75193.9640, 74451.7326] +25-08-31 08:35:11 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:35:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:35:11 | D | - sum error = [73850.9322, 73278.3406, 72907.4793, 72733.1913, 72598.1201] +25-08-31 08:35:11 | D | - best error = [73850.9322, 73278.3406, 72907.4793, 72733.1913, 72598.1201] +25-08-31 08:35:11 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:35:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:35:11 | D | - sum error = [72058.3373, 72057.7049, 71723.3940, 71903.9822, 71931.2027] +25-08-31 08:35:11 | D | - best error = [72058.3373, 72057.7049, 71723.3940, 71723.3940, 71723.3940] +25-08-31 08:35:11 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:35:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:35:11 | D | - sum error = [72056.5147, 71752.0816, 72080.2255, 72245.3136, 72339.2859] +25-08-31 08:35:11 | D | - best error = [71723.3940, 71723.3940, 71723.3940, 71723.3940, 71723.3940] +25-08-31 08:35:11 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:35:11 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:35:11 | D | - sum error = [98659.3110, 94232.7685, 89790.1183, 86536.6682, 84157.6912] +25-08-31 08:35:11 | D | - best error = [71723.3940, 71723.3940, 71723.3940, 71723.3940, 71723.3940] +25-08-31 08:35:11 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:35:11 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:35:11 | D | - sum error = [81544.4580, 79751.0366, 77742.2531, 76425.9525, 75257.8773] +25-08-31 08:35:11 | D | - best error = [71723.3940, 71723.3940, 71723.3940, 71723.3940, 71723.3940] +25-08-31 08:35:11 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:35:11 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:35:11 | D | - sum error = [74208.3691, 73550.8184, 73066.4346, 72908.1289, 72404.6301] +25-08-31 08:35:11 | D | - best error = [71723.3940, 71723.3940, 71723.3940, 71723.3940, 71723.3940] +25-08-31 08:35:11 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:35:11 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:35:11 | D | - sum error = [72105.3807, 71928.9890, 72152.2869, 72390.5002] +25-08-31 08:35:11 | D | - best error = [71723.3940, 71723.3940, 71723.3940, 71723.3940] +25-08-31 08:35:11 | D | + error = 71723.3940 +25-08-31 08:35:11 | D | + scale = [min=1.0687, max=5.7116] +25-08-31 08:35:11 | D | - single_transformer_blocks.27.attn.out_proj +25-08-31 08:35:11 | D | + w: sfp4_e2m1_all +25-08-31 08:35:11 | D | + x: sfp4_e2m1_all +25-08-31 08:35:11 | D | + y: None +25-08-31 08:35:11 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:35:11 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:35:12 | D | + x - AbsMax +25-08-31 08:35:12 | D | + x = [min=0.8477, max=9.6250] +25-08-31 08:35:12 | D | + w - AbsMax +25-08-31 08:35:12 | D | + w = [min=0.1128, max=0.4258] +25-08-31 08:35:12 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:35:13 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:36:17 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:36:17 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:36:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:36:17 | D | - sum error = [ 6573.5762, 6532.1586, 6471.0506, 6467.9261, 6450.0639] +25-08-31 08:36:17 | D | - best error = [ 6573.5762, 6532.1586, 6471.0506, 6467.9261, 6450.0639] +25-08-31 08:36:17 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:36:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:36:17 | D | - sum error = [ 6409.2502, 6381.2218, 6350.2624, 6324.3276, 6298.0117] +25-08-31 08:36:17 | D | - best error = [ 6409.2502, 6381.2218, 6350.2624, 6324.3276, 6298.0117] +25-08-31 08:36:17 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:36:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:36:17 | D | - sum error = [ 6278.3918, 6261.9483, 6226.4542, 6225.4568, 6216.9642] +25-08-31 08:36:17 | D | - best error = [ 6278.3918, 6261.9483, 6226.4542, 6225.4568, 6216.9642] +25-08-31 08:36:17 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:36:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:36:17 | D | - sum error = [ 6227.0934, 6236.5058, 6214.8735, 6237.7102, 6248.5675] +25-08-31 08:36:17 | D | - best error = [ 6216.9642, 6216.9642, 6214.8735, 6214.8735, 6214.8735] +25-08-31 08:36:17 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:36:17 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:36:17 | D | - sum error = [ 6630.4000, 6583.6385, 6530.1253, 6517.8217, 6456.2955] +25-08-31 08:36:17 | D | - best error = [ 6214.8735, 6214.8735, 6214.8735, 6214.8735, 6214.8735] +25-08-31 08:36:17 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:36:17 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:36:17 | D | - sum error = [ 6430.6699, 6404.0294, 6350.2161, 6347.4455, 6297.3826] +25-08-31 08:36:17 | D | - best error = [ 6214.8735, 6214.8735, 6214.8735, 6214.8735, 6214.8735] +25-08-31 08:36:17 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:36:17 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:36:17 | D | - sum error = [ 6268.7268, 6266.1098, 6233.9650, 6216.1760, 6225.8352] +25-08-31 08:36:17 | D | - best error = [ 6214.8735, 6214.8735, 6214.8735, 6214.8735, 6214.8735] +25-08-31 08:36:17 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:36:17 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:36:17 | D | - sum error = [ 6206.3361, 6220.3177, 6233.4296, 6232.3244] +25-08-31 08:36:17 | D | - best error = [ 6206.3361, 6206.3361, 6206.3361, 6206.3361] +25-08-31 08:36:17 | D | + error = 6206.3361 +25-08-31 08:36:17 | D | + scale = [min=1.2781, max=8.4335] +25-08-31 08:36:17 | D | - single_transformer_blocks.27.down_proj +25-08-31 08:36:17 | D | + w: sfp4_e2m1_all +25-08-31 08:36:17 | D | + x: sfp4_e2m1_all +25-08-31 08:36:17 | D | + y: None +25-08-31 08:36:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:36:17 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:36:20 | D | + x - AbsMax +25-08-31 08:36:20 | D | + x = [min=1.1406, max=12.0000] +25-08-31 08:36:20 | D | + w - AbsMax +25-08-31 08:36:20 | D | + w = [min=0.1113, max=0.8086] +25-08-31 08:36:20 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 08:36:22 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 08:38:27 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:38:27 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:38:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:38:27 | D | - sum error = [ 5704.6055, 5709.2034, 5688.7775, 5686.9407, 5699.2587] +25-08-31 08:38:27 | D | - best error = [ 5704.6055, 5704.6055, 5688.7775, 5686.9407, 5686.9407] +25-08-31 08:38:27 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:38:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:38:27 | D | - sum error = [ 5716.7235, 5703.8903, 5732.8328, 5736.5875, 5756.1025] +25-08-31 08:38:27 | D | - best error = [ 5686.9407, 5686.9407, 5686.9407, 5686.9407, 5686.9407] +25-08-31 08:38:27 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:38:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:38:27 | D | - sum error = [ 5745.8213, 5809.5150, 5783.7675, 5811.0027, 5844.8052] +25-08-31 08:38:27 | D | - best error = [ 5686.9407, 5686.9407, 5686.9407, 5686.9407, 5686.9407] +25-08-31 08:38:27 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:38:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:38:27 | D | - sum error = [ 5890.7779, 5917.8293, 5966.2367, 5991.7553, 6068.9086] +25-08-31 08:38:27 | D | - best error = [ 5686.9407, 5686.9407, 5686.9407, 5686.9407, 5686.9407] +25-08-31 08:38:27 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:38:27 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:38:27 | D | - sum error = [ 6185.6909, 6121.2848, 6072.2414, 6032.4393, 5986.2446] +25-08-31 08:38:27 | D | - best error = [ 5686.9407, 5686.9407, 5686.9407, 5686.9407, 5686.9407] +25-08-31 08:38:27 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:38:27 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:38:27 | D | - sum error = [ 5931.6770, 5904.3174, 5853.4605, 5835.2426, 5855.3483] +25-08-31 08:38:27 | D | - best error = [ 5686.9407, 5686.9407, 5686.9407, 5686.9407, 5686.9407] +25-08-31 08:38:27 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:38:27 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:38:27 | D | - sum error = [ 5835.6426, 5839.8199, 5855.8757, 5852.3787, 5880.2789] +25-08-31 08:38:27 | D | - best error = [ 5686.9407, 5686.9407, 5686.9407, 5686.9407, 5686.9407] +25-08-31 08:38:27 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:38:27 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:38:27 | D | - sum error = [ 5908.4905, 5946.1847, 5986.2930, 6026.5652] +25-08-31 08:38:27 | D | - best error = [ 5686.9407, 5686.9407, 5686.9407, 5686.9407] +25-08-31 08:38:27 | D | + error = 5686.9407 +25-08-31 08:38:27 | D | + scale = [min=1.0199, max=1.4517] +25-08-31 08:38:46 | D | - Smoothing Diffusion Block single_transformer_blocks.28 +25-08-31 08:38:46 | D | - Skipping Module single_transformer_blocks.28.norm.linear +25-08-31 08:38:46 | D | - Smoothing Transformer Block single_transformer_blocks.28 +25-08-31 08:38:46 | D | - single_transformer_blocks.28.attn.qkv_proj + single_transformer_blocks.28.up_proj +25-08-31 08:38:46 | D | + w: sfp4_e2m1_all +25-08-31 08:38:46 | D | + x: sfp4_e2m1_all +25-08-31 08:38:46 | D | + y: None +25-08-31 08:38:46 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:38:46 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:38:47 | D | + x - AbsMax +25-08-31 08:38:47 | D | + x = [min=0.4766, max=31.8750] +25-08-31 08:38:47 | D | + w - AbsMax +25-08-31 08:38:47 | D | + w = [min=0.1553, max=1.8750] +25-08-31 08:38:47 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:38:48 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:40:57 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:40:57 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:40:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:40:57 | D | - sum error = [77412.0677, 76351.2322, 75531.4161, 74936.2934, 74396.1978] +25-08-31 08:40:57 | D | - best error = [77412.0677, 76351.2322, 75531.4161, 74936.2934, 74396.1978] +25-08-31 08:40:57 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:40:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:40:57 | D | - sum error = [73811.4458, 73219.4754, 72985.9848, 72580.2397, 72376.6009] +25-08-31 08:40:57 | D | - best error = [73811.4458, 73219.4754, 72985.9848, 72580.2397, 72376.6009] +25-08-31 08:40:57 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:40:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:40:57 | D | - sum error = [72123.7798, 72230.1211, 72058.8062, 71959.4126, 72182.8150] +25-08-31 08:40:57 | D | - best error = [72123.7798, 72123.7798, 72058.8062, 71959.4126, 71959.4126] +25-08-31 08:40:57 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:40:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:40:57 | D | - sum error = [72187.6772, 72021.1311, 72382.2564, 72497.5590, 72788.8132] +25-08-31 08:40:57 | D | - best error = [71959.4126, 71959.4126, 71959.4126, 71959.4126, 71959.4126] +25-08-31 08:40:57 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:40:57 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:40:57 | D | - sum error = [110213.5655, 104450.3039, 99596.6830, 94341.1559, 90447.4410] +25-08-31 08:40:57 | D | - best error = [71959.4126, 71959.4126, 71959.4126, 71959.4126, 71959.4126] +25-08-31 08:40:57 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:40:57 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:40:57 | D | - sum error = [86711.1464, 84080.7566, 81393.7094, 79386.6407, 77980.1919] +25-08-31 08:40:57 | D | - best error = [71959.4126, 71959.4126, 71959.4126, 71959.4126, 71959.4126] +25-08-31 08:40:57 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:40:57 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:40:57 | D | - sum error = [76507.5498, 75075.9828, 74231.1581, 73647.7237, 73008.0333] +25-08-31 08:40:57 | D | - best error = [71959.4126, 71959.4126, 71959.4126, 71959.4126, 71959.4126] +25-08-31 08:40:57 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:40:57 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:40:57 | D | - sum error = [73112.4933, 72518.5805, 72627.5478, 72684.5573] +25-08-31 08:40:57 | D | - best error = [71959.4126, 71959.4126, 71959.4126, 71959.4126] +25-08-31 08:40:57 | D | + error = 71959.4126 +25-08-31 08:40:57 | D | + scale = [min=0.6177, max=9.4895] +25-08-31 08:40:57 | D | - single_transformer_blocks.28.attn.out_proj +25-08-31 08:40:57 | D | + w: sfp4_e2m1_all +25-08-31 08:40:57 | D | + x: sfp4_e2m1_all +25-08-31 08:40:57 | D | + y: None +25-08-31 08:40:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:40:57 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:40:58 | D | + x - AbsMax +25-08-31 08:40:58 | D | + x = [min=0.7539, max=6.8125] +25-08-31 08:40:58 | D | + w - AbsMax +25-08-31 08:40:58 | D | + w = [min=0.1074, max=0.4863] +25-08-31 08:40:58 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:40:58 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:41:59 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:41:59 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:41:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:41:59 | D | - sum error = [ 4969.5897, 4954.1970, 4953.1483, 4958.8918, 4949.7374] +25-08-31 08:41:59 | D | - best error = [ 4969.5897, 4954.1970, 4953.1483, 4953.1483, 4949.7374] +25-08-31 08:41:59 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:41:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:41:59 | D | - sum error = [ 4938.5848, 4935.4556, 4930.6450, 4923.6547, 4922.3110] +25-08-31 08:41:59 | D | - best error = [ 4938.5848, 4935.4556, 4930.6450, 4923.6547, 4922.3110] +25-08-31 08:41:59 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:41:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:41:59 | D | - sum error = [ 4916.3942, 4924.4571, 4908.9292, 4914.8656, 4915.7262] +25-08-31 08:41:59 | D | - best error = [ 4916.3942, 4916.3942, 4908.9292, 4908.9292, 4908.9292] +25-08-31 08:41:59 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:41:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:41:59 | D | - sum error = [ 4912.0204, 4904.5960, 4910.7270, 4913.3323, 4916.6448] +25-08-31 08:41:59 | D | - best error = [ 4908.9292, 4904.5960, 4904.5960, 4904.5960, 4904.5960] +25-08-31 08:41:59 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:41:59 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:41:59 | D | - sum error = [ 5054.6438, 5039.0904, 5027.7609, 5002.7918, 4991.1754] +25-08-31 08:41:59 | D | - best error = [ 4904.5960, 4904.5960, 4904.5960, 4904.5960, 4904.5960] +25-08-31 08:41:59 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:41:59 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:41:59 | D | - sum error = [ 4989.8986, 4969.4239, 4973.4336, 4954.5975, 4944.6258] +25-08-31 08:41:59 | D | - best error = [ 4904.5960, 4904.5960, 4904.5960, 4904.5960, 4904.5960] +25-08-31 08:41:59 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:41:59 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:41:59 | D | - sum error = [ 4949.3379, 4938.8576, 4925.7440, 4904.7836, 4915.2583] +25-08-31 08:41:59 | D | - best error = [ 4904.5960, 4904.5960, 4904.5960, 4904.5960, 4904.5960] +25-08-31 08:41:59 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:41:59 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:41:59 | D | - sum error = [ 4902.5404, 4904.1095, 4914.7407, 4922.2105] +25-08-31 08:41:59 | D | - best error = [ 4902.5404, 4902.5404, 4902.5404, 4902.5404] +25-08-31 08:41:59 | D | + error = 4902.5404 +25-08-31 08:41:59 | D | + scale = [min=1.0267, max=6.2195] +25-08-31 08:41:59 | D | - single_transformer_blocks.28.down_proj +25-08-31 08:41:59 | D | + w: sfp4_e2m1_all +25-08-31 08:41:59 | D | + x: sfp4_e2m1_all +25-08-31 08:41:59 | D | + y: None +25-08-31 08:41:59 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:41:59 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:42:01 | D | + x - AbsMax +25-08-31 08:42:01 | D | + x = [min=0.4004, max=14.6250] +25-08-31 08:42:01 | D | + w - AbsMax +25-08-31 08:42:01 | D | + w = [min=0.1055, max=0.8867] +25-08-31 08:42:01 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:42:03 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:43:57 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:43:57 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:43:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:43:57 | D | - sum error = [ 5212.1163, 5217.8172, 5207.6974, 5206.3480, 5204.0932] +25-08-31 08:43:57 | D | - best error = [ 5212.1163, 5212.1163, 5207.6974, 5206.3480, 5204.0932] +25-08-31 08:43:57 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:43:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:43:57 | D | - sum error = [ 5207.3594, 5201.0614, 5205.4145, 5255.0117, 5213.3448] +25-08-31 08:43:57 | D | - best error = [ 5204.0932, 5201.0614, 5201.0614, 5201.0614, 5201.0614] +25-08-31 08:43:57 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:43:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:43:57 | D | - sum error = [ 5213.1867, 5229.9375, 5233.7136, 5274.2611, 5279.9265] +25-08-31 08:43:57 | D | - best error = [ 5201.0614, 5201.0614, 5201.0614, 5201.0614, 5201.0614] +25-08-31 08:43:57 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:43:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:43:57 | D | - sum error = [ 5292.4193, 5330.6549, 5338.6504, 5374.9035, 5420.9695] +25-08-31 08:43:57 | D | - best error = [ 5201.0614, 5201.0614, 5201.0614, 5201.0614, 5201.0614] +25-08-31 08:43:57 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:43:57 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:43:57 | D | - sum error = [ 6110.9737, 5548.9525, 5502.6515, 5456.4987, 5393.3110] +25-08-31 08:43:57 | D | - best error = [ 5201.0614, 5201.0614, 5201.0614, 5201.0614, 5201.0614] +25-08-31 08:43:57 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:43:57 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:43:57 | D | - sum error = [ 5354.8733, 5318.1617, 5288.4266, 5270.0592, 5270.1408] +25-08-31 08:43:57 | D | - best error = [ 5201.0614, 5201.0614, 5201.0614, 5201.0614, 5201.0614] +25-08-31 08:43:57 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:43:57 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:43:57 | D | - sum error = [ 5255.7892, 5248.2258, 5249.7535, 5269.9431, 5275.9359] +25-08-31 08:43:57 | D | - best error = [ 5201.0614, 5201.0614, 5201.0614, 5201.0614, 5201.0614] +25-08-31 08:43:57 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:43:57 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:43:57 | D | - sum error = [ 5295.1323, 5340.6846, 5358.4664, 5407.8175] +25-08-31 08:43:57 | D | - best error = [ 5201.0614, 5201.0614, 5201.0614, 5201.0614] +25-08-31 08:43:57 | D | + error = 5201.0614 +25-08-31 08:43:57 | D | + scale = [min=0.7599, max=2.2363] +25-08-31 08:44:15 | D | - Smoothing Diffusion Block single_transformer_blocks.29 +25-08-31 08:44:15 | D | - Skipping Module single_transformer_blocks.29.norm.linear +25-08-31 08:44:15 | D | - Smoothing Transformer Block single_transformer_blocks.29 +25-08-31 08:44:15 | D | - single_transformer_blocks.29.attn.qkv_proj + single_transformer_blocks.29.up_proj +25-08-31 08:44:15 | D | + w: sfp4_e2m1_all +25-08-31 08:44:15 | D | + x: sfp4_e2m1_all +25-08-31 08:44:15 | D | + y: None +25-08-31 08:44:15 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:44:15 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:44:15 | D | + x - AbsMax +25-08-31 08:44:15 | D | + x = [min=0.4297, max=25.0000] +25-08-31 08:44:15 | D | + w - AbsMax +25-08-31 08:44:15 | D | + w = [min=0.1055, max=0.6914] +25-08-31 08:44:15 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:44:17 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:46:25 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:46:25 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:46:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:46:25 | D | - sum error = [75725.8145, 74283.1564, 73455.9216, 72637.1030, 71960.5545] +25-08-31 08:46:25 | D | - best error = [75725.8145, 74283.1564, 73455.9216, 72637.1030, 71960.5545] +25-08-31 08:46:25 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:46:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:46:25 | D | - sum error = [71174.8126, 70951.1913, 70312.9134, 69668.1671, 69718.4243] +25-08-31 08:46:25 | D | - best error = [71174.8126, 70951.1913, 70312.9134, 69668.1671, 69668.1671] +25-08-31 08:46:25 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:46:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:46:25 | D | - sum error = [69549.1934, 69480.6149, 69650.9129, 69513.9146, 69450.2075] +25-08-31 08:46:25 | D | - best error = [69549.1934, 69480.6149, 69480.6149, 69480.6149, 69450.2075] +25-08-31 08:46:25 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:46:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:46:25 | D | - sum error = [69632.3015, 69730.0339, 70116.5713, 70207.4749, 70371.2632] +25-08-31 08:46:25 | D | - best error = [69450.2075, 69450.2075, 69450.2075, 69450.2075, 69450.2075] +25-08-31 08:46:25 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:46:25 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:46:25 | D | - sum error = [90315.7356, 86633.1196, 83684.6010, 80529.5720, 78408.5220] +25-08-31 08:46:25 | D | - best error = [69450.2075, 69450.2075, 69450.2075, 69450.2075, 69450.2075] +25-08-31 08:46:25 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:46:25 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:46:25 | D | - sum error = [76755.5471, 74656.2538, 73227.8382, 72424.5262, 71402.6384] +25-08-31 08:46:25 | D | - best error = [69450.2075, 69450.2075, 69450.2075, 69450.2075, 69450.2075] +25-08-31 08:46:25 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:46:25 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:46:25 | D | - sum error = [70939.3413, 70325.8082, 69734.1530, 69887.8974, 69557.0001] +25-08-31 08:46:25 | D | - best error = [69450.2075, 69450.2075, 69450.2075, 69450.2075, 69450.2075] +25-08-31 08:46:25 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:46:25 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:46:25 | D | - sum error = [69783.6400, 69802.0754, 70098.5254, 70363.6888] +25-08-31 08:46:25 | D | - best error = [69450.2075, 69450.2075, 69450.2075, 69450.2075] +25-08-31 08:46:25 | D | + error = 69450.2075 +25-08-31 08:46:25 | D | + scale = [min=0.5536, max=9.5183] +25-08-31 08:46:25 | D | - single_transformer_blocks.29.attn.out_proj +25-08-31 08:46:25 | D | + w: sfp4_e2m1_all +25-08-31 08:46:25 | D | + x: sfp4_e2m1_all +25-08-31 08:46:25 | D | + y: None +25-08-31 08:46:25 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:46:25 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:46:26 | D | + x - AbsMax +25-08-31 08:46:26 | D | + x = [min=0.4863, max=9.0625] +25-08-31 08:46:26 | D | + w - AbsMax +25-08-31 08:46:26 | D | + w = [min=0.0674, max=0.4180] +25-08-31 08:46:26 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:46:26 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:47:27 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:47:27 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:47:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:47:27 | D | - sum error = [ 5124.0861, 5102.7228, 5068.9983, 5044.6841, 5023.5671] +25-08-31 08:47:27 | D | - best error = [ 5124.0861, 5102.7228, 5068.9983, 5044.6841, 5023.5671] +25-08-31 08:47:27 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:47:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:47:27 | D | - sum error = [ 4997.1070, 4995.5953, 4963.7927, 4961.3776, 4944.4021] +25-08-31 08:47:27 | D | - best error = [ 4997.1070, 4995.5953, 4963.7927, 4961.3776, 4944.4021] +25-08-31 08:47:27 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:47:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:47:27 | D | - sum error = [ 4943.6367, 4944.6608, 4941.3786, 4938.8655, 4916.7168] +25-08-31 08:47:27 | D | - best error = [ 4943.6367, 4943.6367, 4941.3786, 4938.8655, 4916.7168] +25-08-31 08:47:27 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:47:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:47:27 | D | - sum error = [ 4939.0327, 4928.6801, 4940.1844, 4940.9244, 4959.4546] +25-08-31 08:47:27 | D | - best error = [ 4916.7168, 4916.7168, 4916.7168, 4916.7168, 4916.7168] +25-08-31 08:47:27 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:47:27 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:47:27 | D | - sum error = [ 5152.9995, 5128.2317, 5108.3919, 5068.2105, 5021.9556] +25-08-31 08:47:27 | D | - best error = [ 4916.7168, 4916.7168, 4916.7168, 4916.7168, 4916.7168] +25-08-31 08:47:27 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:47:27 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:47:27 | D | - sum error = [ 5001.3262, 4973.7045, 4965.7612, 4943.5791, 4950.5842] +25-08-31 08:47:27 | D | - best error = [ 4916.7168, 4916.7168, 4916.7168, 4916.7168, 4916.7168] +25-08-31 08:47:27 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:47:27 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:47:27 | D | - sum error = [ 4928.7510, 4933.2636, 4924.7700, 4919.5152, 4927.6331] +25-08-31 08:47:27 | D | - best error = [ 4916.7168, 4916.7168, 4916.7168, 4916.7168, 4916.7168] +25-08-31 08:47:27 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:47:27 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:47:27 | D | - sum error = [ 4918.5120, 4945.5827, 4951.7470, 4948.3060] +25-08-31 08:47:27 | D | - best error = [ 4916.7168, 4916.7168, 4916.7168, 4916.7168] +25-08-31 08:47:27 | D | + error = 4916.7168 +25-08-31 08:47:27 | D | + scale = [min=0.6037, max=4.6781] +25-08-31 08:47:27 | D | - single_transformer_blocks.29.down_proj +25-08-31 08:47:27 | D | + w: sfp4_e2m1_all +25-08-31 08:47:27 | D | + x: sfp4_e2m1_all +25-08-31 08:47:27 | D | + y: None +25-08-31 08:47:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:47:27 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:47:29 | D | + x - AbsMax +25-08-31 08:47:29 | D | + x = [min=0.8672, max=11.0625] +25-08-31 08:47:29 | D | + w - AbsMax +25-08-31 08:47:29 | D | + w = [min=0.1118, max=1.1094] +25-08-31 08:47:29 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:47:31 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:49:27 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:49:27 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:49:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:49:27 | D | - sum error = [ 5732.4196, 5717.1517, 5711.1053, 5708.2465, 5701.0090] +25-08-31 08:49:27 | D | - best error = [ 5732.4196, 5717.1517, 5711.1053, 5708.2465, 5701.0090] +25-08-31 08:49:27 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:49:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:49:27 | D | - sum error = [ 5699.0483, 5691.3534, 5695.6868, 5699.9777, 6008.7137] +25-08-31 08:49:27 | D | - best error = [ 5699.0483, 5691.3534, 5691.3534, 5691.3534, 5691.3534] +25-08-31 08:49:27 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:49:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:49:27 | D | - sum error = [ 5704.6700, 5718.0374, 5728.8920, 5752.0278, 5761.8306] +25-08-31 08:49:27 | D | - best error = [ 5691.3534, 5691.3534, 5691.3534, 5691.3534, 5691.3534] +25-08-31 08:49:27 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:49:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:49:27 | D | - sum error = [ 5781.4792, 5802.8203, 5827.3582, 5856.9487, 5902.4795] +25-08-31 08:49:27 | D | - best error = [ 5691.3534, 5691.3534, 5691.3534, 5691.3534, 5691.3534] +25-08-31 08:49:27 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:49:27 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:49:27 | D | - sum error = [ 6105.0982, 6032.2536, 5970.4298, 5924.1901, 5858.5981] +25-08-31 08:49:27 | D | - best error = [ 5691.3534, 5691.3534, 5691.3534, 5691.3534, 5691.3534] +25-08-31 08:49:27 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:49:27 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:49:27 | D | - sum error = [ 5826.2842, 5792.4774, 5763.3704, 5765.2285, 5756.7770] +25-08-31 08:49:27 | D | - best error = [ 5691.3534, 5691.3534, 5691.3534, 5691.3534, 5691.3534] +25-08-31 08:49:27 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:49:27 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:49:27 | D | - sum error = [ 5727.2173, 5805.4140, 5725.5772, 5750.8747, 5752.3684] +25-08-31 08:49:27 | D | - best error = [ 5691.3534, 5691.3534, 5691.3534, 5691.3534, 5691.3534] +25-08-31 08:49:27 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:49:27 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:49:27 | D | - sum error = [ 5794.9751, 5821.4902, 5851.8339, 5899.4332] +25-08-31 08:49:27 | D | - best error = [ 5691.3534, 5691.3534, 5691.3534, 5691.3534] +25-08-31 08:49:27 | D | + error = 5691.3534 +25-08-31 08:49:27 | D | + scale = [min=0.9582, max=2.0566] +25-08-31 08:49:45 | D | - Smoothing Diffusion Block single_transformer_blocks.30 +25-08-31 08:49:45 | D | - Skipping Module single_transformer_blocks.30.norm.linear +25-08-31 08:49:45 | D | - Smoothing Transformer Block single_transformer_blocks.30 +25-08-31 08:49:45 | D | - single_transformer_blocks.30.attn.qkv_proj + single_transformer_blocks.30.up_proj +25-08-31 08:49:45 | D | + w: sfp4_e2m1_all +25-08-31 08:49:45 | D | + x: sfp4_e2m1_all +25-08-31 08:49:45 | D | + y: None +25-08-31 08:49:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:49:45 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:49:46 | D | + x - AbsMax +25-08-31 08:49:46 | D | + x = [min=0.7578, max=24.2500] +25-08-31 08:49:46 | D | + w - AbsMax +25-08-31 08:49:46 | D | + w = [min=0.0928, max=1.5859] +25-08-31 08:49:46 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:49:47 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:51:56 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:51:56 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:51:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:51:56 | D | - sum error = [96732.4379, 95417.0943, 94732.6316, 93788.5445, 92729.6782] +25-08-31 08:51:56 | D | - best error = [96732.4379, 95417.0943, 94732.6316, 93788.5445, 92729.6782] +25-08-31 08:51:56 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:51:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:51:56 | D | - sum error = [92088.3888, 91636.6026, 90976.9680, 90589.5019, 90392.8719] +25-08-31 08:51:56 | D | - best error = [92088.3888, 91636.6026, 90976.9680, 90589.5019, 90392.8719] +25-08-31 08:51:56 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:51:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:51:56 | D | - sum error = [90221.2293, 90281.1112, 89959.0463, 90009.8171, 89956.4510] +25-08-31 08:51:56 | D | - best error = [90221.2293, 90221.2293, 89959.0463, 89959.0463, 89956.4510] +25-08-31 08:51:56 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:51:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:51:56 | D | - sum error = [90110.9574, 90091.9267, 90107.4175, 90410.4603, 90899.6542] +25-08-31 08:51:56 | D | - best error = [89956.4510, 89956.4510, 89956.4510, 89956.4510, 89956.4510] +25-08-31 08:51:56 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:51:56 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:51:56 | D | - sum error = [122362.8200, 117297.4743, 112752.7300, 109362.6534, 105785.6453] +25-08-31 08:51:56 | D | - best error = [89956.4510, 89956.4510, 89956.4510, 89956.4510, 89956.4510] +25-08-31 08:51:56 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:51:56 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:51:56 | D | - sum error = [102816.9760, 100032.2099, 98396.5346, 96415.2831, 94882.6765] +25-08-31 08:51:56 | D | - best error = [89956.4510, 89956.4510, 89956.4510, 89956.4510, 89956.4510] +25-08-31 08:51:56 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:51:56 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:51:56 | D | - sum error = [93367.5390, 92804.7116, 91988.5711, 91474.5789, 91117.9701] +25-08-31 08:51:56 | D | - best error = [89956.4510, 89956.4510, 89956.4510, 89956.4510, 89956.4510] +25-08-31 08:51:56 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:51:56 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:51:56 | D | - sum error = [90704.6393, 90907.2044, 90529.7193, 90835.9724] +25-08-31 08:51:56 | D | - best error = [89956.4510, 89956.4510, 89956.4510, 89956.4510] +25-08-31 08:51:56 | D | + error = 89956.4510 +25-08-31 08:51:56 | D | + scale = [min=0.8236, max=9.3175] +25-08-31 08:51:56 | D | - single_transformer_blocks.30.attn.out_proj +25-08-31 08:51:56 | D | + w: sfp4_e2m1_all +25-08-31 08:51:56 | D | + x: sfp4_e2m1_all +25-08-31 08:51:56 | D | + y: None +25-08-31 08:51:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:51:56 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:51:57 | D | + x - AbsMax +25-08-31 08:51:57 | D | + x = [min=0.7305, max=10.0000] +25-08-31 08:51:57 | D | + w - AbsMax +25-08-31 08:51:57 | D | + w = [min=0.1030, max=0.3574] +25-08-31 08:51:57 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:51:58 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:52:57 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:52:57 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:52:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:52:57 | D | - sum error = [ 6078.6518, 6073.6075, 6056.6390, 6035.7783, 6025.7720] +25-08-31 08:52:57 | D | - best error = [ 6078.6518, 6073.6075, 6056.6390, 6035.7783, 6025.7720] +25-08-31 08:52:57 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:52:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:52:57 | D | - sum error = [ 6001.1039, 5986.6578, 5967.5305, 5937.2859, 5936.5532] +25-08-31 08:52:57 | D | - best error = [ 6001.1039, 5986.6578, 5967.5305, 5937.2859, 5936.5532] +25-08-31 08:52:57 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:52:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:52:57 | D | - sum error = [ 5913.6618, 5892.8180, 5877.9425, 5864.4222, 5869.3536] +25-08-31 08:52:57 | D | - best error = [ 5913.6618, 5892.8180, 5877.9425, 5864.4222, 5864.4222] +25-08-31 08:52:57 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:52:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:52:57 | D | - sum error = [ 5858.3940, 5846.6455, 5847.9196, 5861.3118, 5878.3460] +25-08-31 08:52:57 | D | - best error = [ 5858.3940, 5846.6455, 5846.6455, 5846.6455, 5846.6455] +25-08-31 08:52:57 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:52:57 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:52:57 | D | - sum error = [ 6128.6664, 6110.7054, 6076.6306, 6070.8187, 6033.1836] +25-08-31 08:52:57 | D | - best error = [ 5846.6455, 5846.6455, 5846.6455, 5846.6455, 5846.6455] +25-08-31 08:52:57 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:52:57 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:52:57 | D | - sum error = [ 6029.8109, 5989.8187, 5983.8977, 5959.8601, 5943.9354] +25-08-31 08:52:57 | D | - best error = [ 5846.6455, 5846.6455, 5846.6455, 5846.6455, 5846.6455] +25-08-31 08:52:57 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:52:57 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:52:57 | D | - sum error = [ 5922.0027, 5891.6521, 5888.3298, 5870.0780, 5855.6147] +25-08-31 08:52:57 | D | - best error = [ 5846.6455, 5846.6455, 5846.6455, 5846.6455, 5846.6455] +25-08-31 08:52:57 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:52:57 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:52:57 | D | - sum error = [ 5856.2421, 5863.3523, 5867.3756, 5868.9229] +25-08-31 08:52:57 | D | - best error = [ 5846.6455, 5846.6455, 5846.6455, 5846.6455] +25-08-31 08:52:57 | D | + error = 5846.6455 +25-08-31 08:52:57 | D | + scale = [min=0.7778, max=6.3096] +25-08-31 08:52:57 | D | - single_transformer_blocks.30.down_proj +25-08-31 08:52:57 | D | + w: sfp4_e2m1_all +25-08-31 08:52:57 | D | + x: sfp4_e2m1_all +25-08-31 08:52:57 | D | + y: None +25-08-31 08:52:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:52:57 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:52:59 | D | + x - AbsMax +25-08-31 08:52:59 | D | + x = [min=0.5547, max=14.5625] +25-08-31 08:52:59 | D | + w - AbsMax +25-08-31 08:52:59 | D | + w = [min=0.1069, max=1.3984] +25-08-31 08:52:59 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:53:01 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:55:01 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:55:01 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:55:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:55:01 | D | - sum error = [ 5661.0635, 5647.5379, 5625.3599, 5626.8098, 5621.8131] +25-08-31 08:55:01 | D | - best error = [ 5661.0635, 5647.5379, 5625.3599, 5625.3599, 5621.8131] +25-08-31 08:55:01 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:55:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:55:01 | D | - sum error = [ 5601.4143, 5603.2159, 5633.5310, 5618.3029, 5635.7676] +25-08-31 08:55:01 | D | - best error = [ 5601.4143, 5601.4143, 5601.4143, 5601.4143, 5601.4143] +25-08-31 08:55:01 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:55:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:55:01 | D | - sum error = [ 5630.1723, 5636.1513, 5649.0544, 5679.6804, 5688.7441] +25-08-31 08:55:01 | D | - best error = [ 5601.4143, 5601.4143, 5601.4143, 5601.4143, 5601.4143] +25-08-31 08:55:01 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:55:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:55:01 | D | - sum error = [ 5705.2856, 5714.7175, 5749.6676, 5802.3444, 5852.2221] +25-08-31 08:55:01 | D | - best error = [ 5601.4143, 5601.4143, 5601.4143, 5601.4143, 5601.4143] +25-08-31 08:55:01 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:55:01 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:55:01 | D | - sum error = [ 6154.5190, 6071.6196, 5982.9625, 5907.2955, 5830.8833] +25-08-31 08:55:01 | D | - best error = [ 5601.4143, 5601.4143, 5601.4143, 5601.4143, 5601.4143] +25-08-31 08:55:01 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:55:01 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:55:01 | D | - sum error = [ 5843.5181, 5729.8773, 5695.1685, 5675.9977, 5650.8215] +25-08-31 08:55:01 | D | - best error = [ 5601.4143, 5601.4143, 5601.4143, 5601.4143, 5601.4143] +25-08-31 08:55:01 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:55:01 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:55:01 | D | - sum error = [ 5718.9417, 6045.2411, 5662.7663, 5672.5148, 5673.4638] +25-08-31 08:55:01 | D | - best error = [ 5601.4143, 5601.4143, 5601.4143, 5601.4143, 5601.4143] +25-08-31 08:55:01 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:55:01 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:55:01 | D | - sum error = [ 5682.2209, 5729.9068, 5776.7306, 5842.7983] +25-08-31 08:55:01 | D | - best error = [ 5601.4143, 5601.4143, 5601.4143, 5601.4143] +25-08-31 08:55:01 | D | + error = 5601.4143 +25-08-31 08:55:01 | D | + scale = [min=0.8630, max=1.9535] +25-08-31 08:55:20 | D | - Smoothing Diffusion Block single_transformer_blocks.31 +25-08-31 08:55:20 | D | - Skipping Module single_transformer_blocks.31.norm.linear +25-08-31 08:55:20 | D | - Smoothing Transformer Block single_transformer_blocks.31 +25-08-31 08:55:20 | D | - single_transformer_blocks.31.attn.qkv_proj + single_transformer_blocks.31.up_proj +25-08-31 08:55:20 | D | + w: sfp4_e2m1_all +25-08-31 08:55:20 | D | + x: sfp4_e2m1_all +25-08-31 08:55:20 | D | + y: None +25-08-31 08:55:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:55:20 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:55:21 | D | + x - AbsMax +25-08-31 08:55:21 | D | + x = [min=0.2266, max=29.1250] +25-08-31 08:55:21 | D | + w - AbsMax +25-08-31 08:55:21 | D | + w = [min=0.1011, max=0.7383] +25-08-31 08:55:21 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:55:22 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:57:32 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:57:32 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:57:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:57:32 | D | - sum error = [103286.1606, 101704.8706, 99886.3995, 98932.6669, 97562.0753] +25-08-31 08:57:32 | D | - best error = [103286.1606, 101704.8706, 99886.3995, 98932.6669, 97562.0753] +25-08-31 08:57:32 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:57:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:57:32 | D | - sum error = [96820.6365, 95938.3802, 95224.4997, 94740.7004, 94428.1070] +25-08-31 08:57:32 | D | - best error = [96820.6365, 95938.3802, 95224.4997, 94740.7004, 94428.1070] +25-08-31 08:57:32 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:57:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:57:32 | D | - sum error = [94301.9803, 93504.1095, 93495.5178, 93604.2392, 93482.8504] +25-08-31 08:57:32 | D | - best error = [94301.9803, 93504.1095, 93495.5178, 93495.5178, 93482.8504] +25-08-31 08:57:32 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:57:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:57:32 | D | - sum error = [93255.3463, 93483.4529, 93895.8252, 93574.5825, 94339.7665] +25-08-31 08:57:32 | D | - best error = [93255.3463, 93255.3463, 93255.3463, 93255.3463, 93255.3463] +25-08-31 08:57:32 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:57:32 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:57:32 | D | - sum error = [130457.1516, 123952.7797, 119158.7666, 114189.9184, 109947.1058] +25-08-31 08:57:32 | D | - best error = [93255.3463, 93255.3463, 93255.3463, 93255.3463, 93255.3463] +25-08-31 08:57:32 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:57:32 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:57:32 | D | - sum error = [106999.5068, 103543.7970, 101150.0411, 99224.2757, 97754.4535] +25-08-31 08:57:32 | D | - best error = [93255.3463, 93255.3463, 93255.3463, 93255.3463, 93255.3463] +25-08-31 08:57:32 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:57:32 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:57:32 | D | - sum error = [95906.7280, 95266.1122, 94433.6482, 93929.4142, 93673.1557] +25-08-31 08:57:32 | D | - best error = [93255.3463, 93255.3463, 93255.3463, 93255.3463, 93255.3463] +25-08-31 08:57:32 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:57:32 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:57:32 | D | - sum error = [93507.8767, 93419.7458, 93079.2268, 94007.1936] +25-08-31 08:57:32 | D | - best error = [93255.3463, 93255.3463, 93079.2268, 93079.2268] +25-08-31 08:57:32 | D | + error = 93079.2268 +25-08-31 08:57:32 | D | + scale = [min=0.3078, max=23.4128] +25-08-31 08:57:32 | D | - single_transformer_blocks.31.attn.out_proj +25-08-31 08:57:32 | D | + w: sfp4_e2m1_all +25-08-31 08:57:32 | D | + x: sfp4_e2m1_all +25-08-31 08:57:32 | D | + y: None +25-08-31 08:57:32 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:57:32 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:57:33 | D | + x - AbsMax +25-08-31 08:57:33 | D | + x = [min=0.7461, max=9.0000] +25-08-31 08:57:33 | D | + w - AbsMax +25-08-31 08:57:33 | D | + w = [min=0.0972, max=0.3652] +25-08-31 08:57:33 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 08:57:33 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 08:58:35 | D | - x / w range = AbsMax / AbsMax +25-08-31 08:58:35 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 08:58:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:58:35 | D | - sum error = [ 5339.7266, 5343.7448, 5308.9280, 5311.6852, 5327.8951] +25-08-31 08:58:35 | D | - best error = [ 5339.7266, 5339.7266, 5308.9280, 5308.9280, 5308.9280] +25-08-31 08:58:35 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 08:58:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:58:35 | D | - sum error = [ 5285.1369, 5279.2342, 5281.9707, 5277.0687, 5226.4697] +25-08-31 08:58:35 | D | - best error = [ 5285.1369, 5279.2342, 5279.2342, 5277.0687, 5226.4697] +25-08-31 08:58:35 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 08:58:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:58:35 | D | - sum error = [ 5262.3117, 5229.2738, 5249.7373, 5234.0120, 5216.1720] +25-08-31 08:58:35 | D | - best error = [ 5226.4697, 5226.4697, 5226.4697, 5226.4697, 5216.1720] +25-08-31 08:58:35 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:58:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 08:58:35 | D | - sum error = [ 5240.1721, 5231.2939, 5239.0911, 5227.9985, 5246.8139] +25-08-31 08:58:35 | D | - best error = [ 5216.1720, 5216.1720, 5216.1720, 5216.1720, 5216.1720] +25-08-31 08:58:35 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 08:58:35 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 08:58:35 | D | - sum error = [ 5380.5821, 5357.3828, 5330.2520, 5334.2452, 5306.2101] +25-08-31 08:58:35 | D | - best error = [ 5216.1720, 5216.1720, 5216.1720, 5216.1720, 5216.1720] +25-08-31 08:58:35 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 08:58:35 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 08:58:35 | D | - sum error = [ 5297.4824, 5262.4570, 5264.1493, 5268.4763, 5252.0217] +25-08-31 08:58:35 | D | - best error = [ 5216.1720, 5216.1720, 5216.1720, 5216.1720, 5216.1720] +25-08-31 08:58:35 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 08:58:35 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 08:58:35 | D | - sum error = [ 5236.9617, 5231.6921, 5248.8136, 5230.6724, 5223.6822] +25-08-31 08:58:35 | D | - best error = [ 5216.1720, 5216.1720, 5216.1720, 5216.1720, 5216.1720] +25-08-31 08:58:35 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 08:58:35 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 08:58:35 | D | - sum error = [ 5230.4912, 5240.0740, 5243.7773, 5244.0784] +25-08-31 08:58:35 | D | - best error = [ 5216.1720, 5216.1720, 5216.1720, 5216.1720] +25-08-31 08:58:35 | D | + error = 5216.1720 +25-08-31 08:58:35 | D | + scale = [min=0.8146, max=4.6555] +25-08-31 08:58:36 | D | - single_transformer_blocks.31.down_proj +25-08-31 08:58:36 | D | + w: sfp4_e2m1_all +25-08-31 08:58:36 | D | + x: sfp4_e2m1_all +25-08-31 08:58:36 | D | + y: None +25-08-31 08:58:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 08:58:36 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 08:58:38 | D | + x - AbsMax +25-08-31 08:58:38 | D | + x = [min=0.4961, max=15.1250] +25-08-31 08:58:38 | D | + w - AbsMax +25-08-31 08:58:38 | D | + w = [min=0.1064, max=1.3906] +25-08-31 08:58:38 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 08:58:40 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 09:00:44 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:00:44 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:00:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:00:44 | D | - sum error = [ 5333.2255, 5308.5942, 5303.8181, 5310.6683, 5308.8957] +25-08-31 09:00:44 | D | - best error = [ 5333.2255, 5308.5942, 5303.8181, 5303.8181, 5303.8181] +25-08-31 09:00:44 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:00:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:00:44 | D | - sum error = [ 5315.6057, 5325.4035, 5337.2131, 5338.6641, 5349.2205] +25-08-31 09:00:44 | D | - best error = [ 5303.8181, 5303.8181, 5303.8181, 5303.8181, 5303.8181] +25-08-31 09:00:44 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:00:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:00:44 | D | - sum error = [ 5376.0491, 5379.5306, 5381.5057, 5401.2632, 5405.4787] +25-08-31 09:00:44 | D | - best error = [ 5303.8181, 5303.8181, 5303.8181, 5303.8181, 5303.8181] +25-08-31 09:00:44 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:00:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:00:44 | D | - sum error = [ 5433.4348, 5451.9927, 5483.3331, 5570.1697, 5550.3376] +25-08-31 09:00:44 | D | - best error = [ 5303.8181, 5303.8181, 5303.8181, 5303.8181, 5303.8181] +25-08-31 09:00:44 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:00:44 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:00:44 | D | - sum error = [ 5717.4958, 5643.5611, 5586.4151, 5539.6856, 5482.2286] +25-08-31 09:00:44 | D | - best error = [ 5303.8181, 5303.8181, 5303.8181, 5303.8181, 5303.8181] +25-08-31 09:00:44 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:00:44 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:00:44 | D | - sum error = [ 5445.1064, 5410.2442, 5392.9328, 5391.5838, 5396.4327] +25-08-31 09:00:44 | D | - best error = [ 5303.8181, 5303.8181, 5303.8181, 5303.8181, 5303.8181] +25-08-31 09:00:44 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:00:44 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:00:44 | D | - sum error = [ 5374.1878, 5368.5663, 5373.7566, 5386.8930, 5382.0264] +25-08-31 09:00:44 | D | - best error = [ 5303.8181, 5303.8181, 5303.8181, 5303.8181, 5303.8181] +25-08-31 09:00:44 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:00:44 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:00:44 | D | - sum error = [ 5422.4166, 5453.8159, 5478.6471, 5536.0616] +25-08-31 09:00:44 | D | - best error = [ 5303.8181, 5303.8181, 5303.8181, 5303.8181] +25-08-31 09:00:44 | D | + error = 5303.8181 +25-08-31 09:00:44 | D | + scale = [min=0.9323, max=1.3121] +25-08-31 09:01:02 | D | - Smoothing Diffusion Block single_transformer_blocks.32 +25-08-31 09:01:02 | D | - Skipping Module single_transformer_blocks.32.norm.linear +25-08-31 09:01:02 | D | - Smoothing Transformer Block single_transformer_blocks.32 +25-08-31 09:01:02 | D | - single_transformer_blocks.32.attn.qkv_proj + single_transformer_blocks.32.up_proj +25-08-31 09:01:02 | D | + w: sfp4_e2m1_all +25-08-31 09:01:02 | D | + x: sfp4_e2m1_all +25-08-31 09:01:02 | D | + y: None +25-08-31 09:01:02 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:01:02 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 09:01:03 | D | + x - AbsMax +25-08-31 09:01:03 | D | + x = [min=0.5352, max=23.5000] +25-08-31 09:01:03 | D | + w - AbsMax +25-08-31 09:01:03 | D | + w = [min=0.1562, max=1.4219] +25-08-31 09:01:03 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 09:01:04 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 09:03:13 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:03:13 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:03:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:03:13 | D | - sum error = [128671.4947, 126600.9016, 124515.2773, 122305.1135, 120263.4803] +25-08-31 09:03:13 | D | - best error = [128671.4947, 126600.9016, 124515.2773, 122305.1135, 120263.4803] +25-08-31 09:03:13 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:03:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:03:13 | D | - sum error = [118809.7343, 117850.0395, 116789.9524, 115209.5196, 114654.5889] +25-08-31 09:03:13 | D | - best error = [118809.7343, 117850.0395, 116789.9524, 115209.5196, 114654.5889] +25-08-31 09:03:13 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:03:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:03:13 | D | - sum error = [113797.3092, 113114.2245, 112837.2273, 112841.0751, 112322.3521] +25-08-31 09:03:13 | D | - best error = [113797.3092, 113114.2245, 112837.2273, 112837.2273, 112322.3521] +25-08-31 09:03:13 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:03:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:03:13 | D | - sum error = [112280.4078, 112061.0531, 112464.7091, 112201.2546, 112792.6330] +25-08-31 09:03:13 | D | - best error = [112280.4078, 112061.0531, 112061.0531, 112061.0531, 112061.0531] +25-08-31 09:03:13 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:03:13 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:03:13 | D | - sum error = [171753.6260, 161131.8176, 151842.1951, 145238.7466, 139380.4072] +25-08-31 09:03:13 | D | - best error = [112061.0531, 112061.0531, 112061.0531, 112061.0531, 112061.0531] +25-08-31 09:03:13 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:03:13 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:03:13 | D | - sum error = [134270.9628, 130051.2480, 126168.0870, 123351.3377, 120664.6680] +25-08-31 09:03:13 | D | - best error = [112061.0531, 112061.0531, 112061.0531, 112061.0531, 112061.0531] +25-08-31 09:03:13 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:03:13 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:03:13 | D | - sum error = [117853.3065, 116857.3974, 115076.6273, 113913.5953, 113176.6670] +25-08-31 09:03:13 | D | - best error = [112061.0531, 112061.0531, 112061.0531, 112061.0531, 112061.0531] +25-08-31 09:03:13 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:03:13 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:03:13 | D | - sum error = [113231.7547, 112824.8347, 112354.3195, 112278.8301] +25-08-31 09:03:13 | D | - best error = [112061.0531, 112061.0531, 112061.0531, 112061.0531] +25-08-31 09:03:13 | D | + error = 112061.0531 +25-08-31 09:03:13 | D | + scale = [min=0.6064, max=12.4984] +25-08-31 09:03:13 | D | - single_transformer_blocks.32.attn.out_proj +25-08-31 09:03:13 | D | + w: sfp4_e2m1_all +25-08-31 09:03:13 | D | + x: sfp4_e2m1_all +25-08-31 09:03:13 | D | + y: None +25-08-31 09:03:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:03:13 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 09:03:13 | D | + x - AbsMax +25-08-31 09:03:13 | D | + x = [min=0.8125, max=11.5625] +25-08-31 09:03:13 | D | + w - AbsMax +25-08-31 09:03:13 | D | + w = [min=0.1055, max=0.4199] +25-08-31 09:03:13 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 09:03:14 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 09:04:13 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:04:13 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:04:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:04:13 | D | - sum error = [ 8223.0486, 8204.8727, 8185.5900, 8131.9321, 8112.1659] +25-08-31 09:04:13 | D | - best error = [ 8223.0486, 8204.8727, 8185.5900, 8131.9321, 8112.1659] +25-08-31 09:04:13 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:04:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:04:13 | D | - sum error = [ 8073.3406, 8041.4434, 8002.9575, 7978.2732, 7928.9994] +25-08-31 09:04:13 | D | - best error = [ 8073.3406, 8041.4434, 8002.9575, 7978.2732, 7928.9994] +25-08-31 09:04:13 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:04:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:04:13 | D | - sum error = [ 7908.5716, 7878.7588, 7865.8378, 7819.8097, 7852.6589] +25-08-31 09:04:13 | D | - best error = [ 7908.5716, 7878.7588, 7865.8378, 7819.8097, 7819.8097] +25-08-31 09:04:13 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:04:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:04:13 | D | - sum error = [ 7855.8811, 7856.8307, 7838.7894, 7870.9146, 7882.8345] +25-08-31 09:04:13 | D | - best error = [ 7819.8097, 7819.8097, 7819.8097, 7819.8097, 7819.8097] +25-08-31 09:04:13 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:04:13 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:04:13 | D | - sum error = [ 8264.0910, 8226.5638, 8159.5769, 8118.0190, 8105.9556] +25-08-31 09:04:13 | D | - best error = [ 7819.8097, 7819.8097, 7819.8097, 7819.8097, 7819.8097] +25-08-31 09:04:13 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:04:13 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:04:13 | D | - sum error = [ 8072.6021, 8012.0265, 7948.8694, 7941.3974, 7886.3951] +25-08-31 09:04:13 | D | - best error = [ 7819.8097, 7819.8097, 7819.8097, 7819.8097, 7819.8097] +25-08-31 09:04:13 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:04:13 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:04:13 | D | - sum error = [ 7872.9623, 7903.1786, 7863.6897, 7850.1873, 7885.4101] +25-08-31 09:04:13 | D | - best error = [ 7819.8097, 7819.8097, 7819.8097, 7819.8097, 7819.8097] +25-08-31 09:04:13 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:04:13 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:04:13 | D | - sum error = [ 7867.2798, 7852.5835, 7873.0274, 7877.4770] +25-08-31 09:04:13 | D | - best error = [ 7819.8097, 7819.8097, 7819.8097, 7819.8097] +25-08-31 09:04:13 | D | + error = 7819.8097 +25-08-31 09:04:13 | D | + scale = [min=0.8737, max=4.9089] +25-08-31 09:04:13 | D | - single_transformer_blocks.32.down_proj +25-08-31 09:04:13 | D | + w: sfp4_e2m1_all +25-08-31 09:04:13 | D | + x: sfp4_e2m1_all +25-08-31 09:04:13 | D | + y: None +25-08-31 09:04:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:04:13 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 09:04:16 | D | + x - AbsMax +25-08-31 09:04:16 | D | + x = [min=0.2871, max=19.0000] +25-08-31 09:04:16 | D | + w - AbsMax +25-08-31 09:04:16 | D | + w = [min=0.1069, max=1.1719] +25-08-31 09:04:16 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 09:04:18 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 09:06:14 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:06:14 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:06:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:06:14 | D | - sum error = [ 6583.9353, 6565.1672, 6555.3288, 6530.8640, 6527.5239] +25-08-31 09:06:14 | D | - best error = [ 6583.9353, 6565.1672, 6555.3288, 6530.8640, 6527.5239] +25-08-31 09:06:14 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:06:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:06:14 | D | - sum error = [ 6506.8813, 6499.9772, 6503.4065, 6503.1894, 6494.9907] +25-08-31 09:06:14 | D | - best error = [ 6506.8813, 6499.9772, 6499.9772, 6499.9772, 6494.9907] +25-08-31 09:06:14 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:06:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:06:14 | D | - sum error = [ 6511.2334, 6516.0758, 6537.1244, 6531.9375, 6557.1738] +25-08-31 09:06:14 | D | - best error = [ 6494.9907, 6494.9907, 6494.9907, 6494.9907, 6494.9907] +25-08-31 09:06:14 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:06:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:06:14 | D | - sum error = [ 6596.9493, 6620.9286, 6667.3653, 6715.1316, 6752.2909] +25-08-31 09:06:14 | D | - best error = [ 6494.9907, 6494.9907, 6494.9907, 6494.9907, 6494.9907] +25-08-31 09:06:14 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:06:14 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:06:14 | D | - sum error = [ 6946.7198, 6854.5667, 6777.8616, 6718.3245, 6642.4221] +25-08-31 09:06:14 | D | - best error = [ 6494.9907, 6494.9907, 6494.9907, 6494.9907, 6494.9907] +25-08-31 09:06:14 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:06:14 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:06:14 | D | - sum error = [ 6614.7490, 6577.3517, 6533.1328, 6518.7803, 6494.8900] +25-08-31 09:06:14 | D | - best error = [ 6494.9907, 6494.9907, 6494.9907, 6494.9907, 6494.8900] +25-08-31 09:06:14 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:06:14 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:06:14 | D | - sum error = [ 6485.8526, 6484.5348, 6493.4059, 6498.3306, 6531.6124] +25-08-31 09:06:14 | D | - best error = [ 6485.8526, 6484.5348, 6484.5348, 6484.5348, 6484.5348] +25-08-31 09:06:14 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:06:14 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:06:14 | D | - sum error = [ 6603.8780, 6612.7513, 6680.6784, 6743.5258] +25-08-31 09:06:14 | D | - best error = [ 6484.5348, 6484.5348, 6484.5348, 6484.5348] +25-08-31 09:06:14 | D | + error = 6484.5348 +25-08-31 09:06:14 | D | + scale = [min=0.5758, max=8.4614] +25-08-31 09:06:34 | D | - Smoothing Diffusion Block single_transformer_blocks.33 +25-08-31 09:06:34 | D | - Skipping Module single_transformer_blocks.33.norm.linear +25-08-31 09:06:34 | D | - Smoothing Transformer Block single_transformer_blocks.33 +25-08-31 09:06:34 | D | - single_transformer_blocks.33.attn.qkv_proj + single_transformer_blocks.33.up_proj +25-08-31 09:06:34 | D | + w: sfp4_e2m1_all +25-08-31 09:06:34 | D | + x: sfp4_e2m1_all +25-08-31 09:06:34 | D | + y: None +25-08-31 09:06:34 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:06:34 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 09:06:34 | D | + x - AbsMax +25-08-31 09:06:34 | D | + x = [min=0.2617, max=23.1250] +25-08-31 09:06:34 | D | + w - AbsMax +25-08-31 09:06:34 | D | + w = [min=0.1426, max=1.3828] +25-08-31 09:06:34 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 09:06:35 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 09:08:44 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:08:44 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:08:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:08:44 | D | - sum error = [124813.4065, 124191.8186, 122723.9843, 120989.2028, 120797.5968] +25-08-31 09:08:44 | D | - best error = [124813.4065, 124191.8186, 122723.9843, 120989.2028, 120797.5968] +25-08-31 09:08:44 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:08:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:08:44 | D | - sum error = [119169.9291, 118697.2787, 117500.4575, 117300.8745, 117478.9124] +25-08-31 09:08:44 | D | - best error = [119169.9291, 118697.2787, 117500.4575, 117300.8745, 117300.8745] +25-08-31 09:08:44 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:08:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:08:44 | D | - sum error = [116680.6272, 116368.1529, 116419.8790, 116097.9890, 116038.0760] +25-08-31 09:08:44 | D | - best error = [116680.6272, 116368.1529, 116368.1529, 116097.9890, 116038.0760] +25-08-31 09:08:44 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:08:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:08:44 | D | - sum error = [115836.2165, 116410.8190, 117256.9123, 116990.1619, 117118.1901] +25-08-31 09:08:44 | D | - best error = [115836.2165, 115836.2165, 115836.2165, 115836.2165, 115836.2165] +25-08-31 09:08:44 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:08:44 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:08:44 | D | - sum error = [164857.2048, 156262.0388, 148984.2541, 144103.0032, 139048.4656] +25-08-31 09:08:44 | D | - best error = [115836.2165, 115836.2165, 115836.2165, 115836.2165, 115836.2165] +25-08-31 09:08:44 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:08:44 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:08:44 | D | - sum error = [133732.5599, 130555.7293, 126911.9395, 125233.2378, 122792.1436] +25-08-31 09:08:44 | D | - best error = [115836.2165, 115836.2165, 115836.2165, 115836.2165, 115836.2165] +25-08-31 09:08:44 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:08:44 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:08:44 | D | - sum error = [121048.1161, 119874.5798, 118711.3959, 118052.9053, 117562.9700] +25-08-31 09:08:44 | D | - best error = [115836.2165, 115836.2165, 115836.2165, 115836.2165, 115836.2165] +25-08-31 09:08:44 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:08:44 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:08:44 | D | - sum error = [116925.1275, 116898.0323, 117041.4425, 117517.1452] +25-08-31 09:08:44 | D | - best error = [115836.2165, 115836.2165, 115836.2165, 115836.2165] +25-08-31 09:08:44 | D | + error = 115836.2165 +25-08-31 09:08:44 | D | + scale = [min=0.3659, max=10.5454] +25-08-31 09:08:44 | D | - single_transformer_blocks.33.attn.out_proj +25-08-31 09:08:44 | D | + w: sfp4_e2m1_all +25-08-31 09:08:44 | D | + x: sfp4_e2m1_all +25-08-31 09:08:44 | D | + y: None +25-08-31 09:08:44 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:08:44 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 09:08:45 | D | + x - AbsMax +25-08-31 09:08:45 | D | + x = [min=0.6914, max=8.7500] +25-08-31 09:08:45 | D | + w - AbsMax +25-08-31 09:08:45 | D | + w = [min=0.1035, max=0.3379] +25-08-31 09:08:45 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 09:08:46 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 09:09:46 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:09:46 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:09:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:09:46 | D | - sum error = [ 6741.2913, 6740.3409, 6722.6028, 6701.4389, 6694.9569] +25-08-31 09:09:46 | D | - best error = [ 6741.2913, 6740.3409, 6722.6028, 6701.4389, 6694.9569] +25-08-31 09:09:46 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:09:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:09:46 | D | - sum error = [ 6665.0548, 6700.0737, 6685.5097, 6676.5305, 6679.1464] +25-08-31 09:09:46 | D | - best error = [ 6665.0548, 6665.0548, 6665.0548, 6665.0548, 6665.0548] +25-08-31 09:09:46 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:09:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:09:46 | D | - sum error = [ 6684.6209, 6688.5774, 6678.8780, 6696.8793, 6691.6330] +25-08-31 09:09:46 | D | - best error = [ 6665.0548, 6665.0548, 6665.0548, 6665.0548, 6665.0548] +25-08-31 09:09:46 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:09:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:09:46 | D | - sum error = [ 6694.7430, 6708.2269, 6720.9535, 6720.0792, 6743.4923] +25-08-31 09:09:46 | D | - best error = [ 6665.0548, 6665.0548, 6665.0548, 6665.0548, 6665.0548] +25-08-31 09:09:46 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:09:46 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:09:46 | D | - sum error = [ 6766.5545, 6762.0884, 6707.3094, 6721.5748, 6719.4425] +25-08-31 09:09:46 | D | - best error = [ 6665.0548, 6665.0548, 6665.0548, 6665.0548, 6665.0548] +25-08-31 09:09:46 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:09:46 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:09:46 | D | - sum error = [ 6695.1881, 6661.8451, 6655.1781, 6656.3693, 6636.6721] +25-08-31 09:09:46 | D | - best error = [ 6665.0548, 6661.8451, 6655.1781, 6655.1781, 6636.6721] +25-08-31 09:09:46 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:09:46 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:09:46 | D | - sum error = [ 6632.1888, 6641.4206, 6658.0712, 6647.1931, 6667.8710] +25-08-31 09:09:46 | D | - best error = [ 6632.1888, 6632.1888, 6632.1888, 6632.1888, 6632.1888] +25-08-31 09:09:46 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:09:46 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:09:46 | D | - sum error = [ 6696.9996, 6697.1504, 6716.4913, 6735.2868] +25-08-31 09:09:46 | D | - best error = [ 6632.1888, 6632.1888, 6632.1888, 6632.1888] +25-08-31 09:09:46 | D | + error = 6632.1888 +25-08-31 09:09:46 | D | + scale = [min=1.4726, max=6.9909] +25-08-31 09:09:46 | D | - single_transformer_blocks.33.down_proj +25-08-31 09:09:46 | D | + w: sfp4_e2m1_all +25-08-31 09:09:46 | D | + x: sfp4_e2m1_all +25-08-31 09:09:46 | D | + y: None +25-08-31 09:09:46 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:09:46 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 09:09:48 | D | + x - AbsMax +25-08-31 09:09:48 | D | + x = [min=0.2949, max=11.7500] +25-08-31 09:09:48 | D | + w - AbsMax +25-08-31 09:09:48 | D | + w = [min=0.1206, max=1.5000] +25-08-31 09:09:48 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 09:09:50 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 09:11:41 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:11:41 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:11:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:11:41 | D | - sum error = [ 6813.3316, 6800.4503, 6791.4269, 6785.1700, 6782.2742] +25-08-31 09:11:41 | D | - best error = [ 6813.3316, 6800.4503, 6791.4269, 6785.1700, 6782.2742] +25-08-31 09:11:41 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:11:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:11:41 | D | - sum error = [ 6763.5097, 6771.3050, 6755.9994, 6778.1927, 6773.6956] +25-08-31 09:11:41 | D | - best error = [ 6763.5097, 6763.5097, 6755.9994, 6755.9994, 6755.9994] +25-08-31 09:11:41 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:11:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:11:41 | D | - sum error = [ 6771.0218, 6802.6101, 6784.2521, 6810.2517, 6828.4083] +25-08-31 09:11:41 | D | - best error = [ 6755.9994, 6755.9994, 6755.9994, 6755.9994, 6755.9994] +25-08-31 09:11:41 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:11:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:11:41 | D | - sum error = [ 6846.5057, 6883.2864, 6928.0202, 6956.3168, 6990.7619] +25-08-31 09:11:41 | D | - best error = [ 6755.9994, 6755.9994, 6755.9994, 6755.9994, 6755.9994] +25-08-31 09:11:41 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:11:41 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:11:41 | D | - sum error = [ 7790.0149, 7616.5021, 7493.5073, 7359.1070, 7266.3449] +25-08-31 09:11:41 | D | - best error = [ 6755.9994, 6755.9994, 6755.9994, 6755.9994, 6755.9994] +25-08-31 09:11:41 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:11:41 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:11:41 | D | - sum error = [ 7180.6810, 7112.7933, 7026.6441, 6969.3756, 6926.4011] +25-08-31 09:11:41 | D | - best error = [ 6755.9994, 6755.9994, 6755.9994, 6755.9994, 6755.9994] +25-08-31 09:11:41 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:11:41 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:11:41 | D | - sum error = [ 6895.3193, 6872.7287, 6862.8437, 6847.2459, 6856.0187] +25-08-31 09:11:41 | D | - best error = [ 6755.9994, 6755.9994, 6755.9994, 6755.9994, 6755.9994] +25-08-31 09:11:41 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:11:41 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:11:41 | D | - sum error = [ 6867.5539, 6895.6114, 6942.9446, 6986.5336] +25-08-31 09:11:41 | D | - best error = [ 6755.9994, 6755.9994, 6755.9994, 6755.9994] +25-08-31 09:11:41 | D | + error = 6755.9994 +25-08-31 09:11:41 | D | + scale = [min=0.6522, max=2.3687] +25-08-31 09:11:59 | D | - Smoothing Diffusion Block single_transformer_blocks.34 +25-08-31 09:11:59 | D | - Skipping Module single_transformer_blocks.34.norm.linear +25-08-31 09:11:59 | D | - Smoothing Transformer Block single_transformer_blocks.34 +25-08-31 09:11:59 | D | - single_transformer_blocks.34.attn.qkv_proj + single_transformer_blocks.34.up_proj +25-08-31 09:11:59 | D | + w: sfp4_e2m1_all +25-08-31 09:11:59 | D | + x: sfp4_e2m1_all +25-08-31 09:11:59 | D | + y: None +25-08-31 09:11:59 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:11:59 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 09:12:00 | D | + x - AbsMax +25-08-31 09:12:00 | D | + x = [min=0.6484, max=23.3750] +25-08-31 09:12:00 | D | + w - AbsMax +25-08-31 09:12:00 | D | + w = [min=0.1523, max=2.0156] +25-08-31 09:12:00 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 09:12:01 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 09:14:08 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:14:08 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:14:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:14:08 | D | - sum error = [148325.9270, 146768.0619, 144752.5933, 143627.3151, 141612.3113] +25-08-31 09:14:08 | D | - best error = [148325.9270, 146768.0619, 144752.5933, 143627.3151, 141612.3113] +25-08-31 09:14:08 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:14:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:14:08 | D | - sum error = [139928.6200, 139020.3088, 138817.0917, 137724.3940, 137036.0537] +25-08-31 09:14:08 | D | - best error = [139928.6200, 139020.3088, 138817.0917, 137724.3940, 137036.0537] +25-08-31 09:14:08 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:14:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:14:08 | D | - sum error = [136419.8020, 135692.7541, 135716.0858, 135255.8275, 135411.4550] +25-08-31 09:14:08 | D | - best error = [136419.8020, 135692.7541, 135692.7541, 135255.8275, 135255.8275] +25-08-31 09:14:08 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:14:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:14:08 | D | - sum error = [135273.5951, 136018.2727, 135832.6764, 135619.8508, 136760.4528] +25-08-31 09:14:08 | D | - best error = [135255.8275, 135255.8275, 135255.8275, 135255.8275, 135255.8275] +25-08-31 09:14:08 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:14:08 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:14:08 | D | - sum error = [211479.0883, 199788.3193, 190750.7519, 181008.1690, 173925.5609] +25-08-31 09:14:08 | D | - best error = [135255.8275, 135255.8275, 135255.8275, 135255.8275, 135255.8275] +25-08-31 09:14:08 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:14:08 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:14:08 | D | - sum error = [165281.9981, 160706.1665, 155174.7212, 151587.8606, 148950.8534] +25-08-31 09:14:08 | D | - best error = [135255.8275, 135255.8275, 135255.8275, 135255.8275, 135255.8275] +25-08-31 09:14:08 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:14:08 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:14:08 | D | - sum error = [145622.3139, 142026.4331, 141147.4259, 138791.6180, 138235.5481] +25-08-31 09:14:08 | D | - best error = [135255.8275, 135255.8275, 135255.8275, 135255.8275, 135255.8275] +25-08-31 09:14:08 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:14:08 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:14:08 | D | - sum error = [137850.9117, 137201.3350, 137050.3926, 136639.8651] +25-08-31 09:14:08 | D | - best error = [135255.8275, 135255.8275, 135255.8275, 135255.8275] +25-08-31 09:14:08 | D | + error = 135255.8275 +25-08-31 09:14:08 | D | + scale = [min=0.7546, max=7.7569] +25-08-31 09:14:08 | D | - single_transformer_blocks.34.attn.out_proj +25-08-31 09:14:08 | D | + w: sfp4_e2m1_all +25-08-31 09:14:08 | D | + x: sfp4_e2m1_all +25-08-31 09:14:08 | D | + y: None +25-08-31 09:14:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:14:08 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 09:14:09 | D | + x - AbsMax +25-08-31 09:14:09 | D | + x = [min=0.5391, max=14.8125] +25-08-31 09:14:09 | D | + w - AbsMax +25-08-31 09:14:09 | D | + w = [min=0.0933, max=0.3691] +25-08-31 09:14:09 | D | + finished resetting calibrator, ram usage: 14.3 +25-08-31 09:14:10 | D | + finished calculating the original outputs, ram usage: 14.3 +25-08-31 09:15:08 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:15:08 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:15:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:15:08 | D | - sum error = [ 9818.9775, 9776.5548, 9759.1639, 9704.3701, 9678.6454] +25-08-31 09:15:08 | D | - best error = [ 9818.9775, 9776.5548, 9759.1639, 9704.3701, 9678.6454] +25-08-31 09:15:08 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:15:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:15:08 | D | - sum error = [ 9652.6589, 9605.0458, 9562.7034, 9571.6799, 9537.8304] +25-08-31 09:15:08 | D | - best error = [ 9652.6589, 9605.0458, 9562.7034, 9562.7034, 9537.8304] +25-08-31 09:15:08 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:15:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:15:08 | D | - sum error = [ 9508.5381, 9486.4134, 9461.4781, 9500.5600, 9499.8981] +25-08-31 09:15:08 | D | - best error = [ 9508.5381, 9486.4134, 9461.4781, 9461.4781, 9461.4781] +25-08-31 09:15:08 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:15:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:15:08 | D | - sum error = [ 9509.1346, 9527.5457, 9559.0891, 9614.8776, 9648.8262] +25-08-31 09:15:08 | D | - best error = [ 9461.4781, 9461.4781, 9461.4781, 9461.4781, 9461.4781] +25-08-31 09:15:08 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:15:08 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:15:08 | D | - sum error = [ 9828.0110, 9749.3302, 9720.0181, 9682.7785, 9641.5849] +25-08-31 09:15:08 | D | - best error = [ 9461.4781, 9461.4781, 9461.4781, 9461.4781, 9461.4781] +25-08-31 09:15:08 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:15:08 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:15:08 | D | - sum error = [ 9597.6793, 9557.1255, 9511.4188, 9501.1166, 9426.6512] +25-08-31 09:15:08 | D | - best error = [ 9461.4781, 9461.4781, 9461.4781, 9461.4781, 9426.6512] +25-08-31 09:15:08 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:15:08 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:15:08 | D | - sum error = [ 9456.0897, 9401.1667, 9445.5607, 9427.8061, 9456.9048] +25-08-31 09:15:08 | D | - best error = [ 9426.6512, 9401.1667, 9401.1667, 9401.1667, 9401.1667] +25-08-31 09:15:08 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:15:08 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:15:08 | D | - sum error = [ 9470.3880, 9501.9577, 9593.4248, 9628.0698] +25-08-31 09:15:08 | D | - best error = [ 9401.1667, 9401.1667, 9401.1667, 9401.1667] +25-08-31 09:15:08 | D | + error = 9401.1667 +25-08-31 09:15:08 | D | + scale = [min=1.5003, max=9.4261] +25-08-31 09:15:09 | D | - single_transformer_blocks.34.down_proj +25-08-31 09:15:09 | D | + w: sfp4_e2m1_all +25-08-31 09:15:09 | D | + x: sfp4_e2m1_all +25-08-31 09:15:09 | D | + y: None +25-08-31 09:15:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:15:09 | D | + finished parsing calibration arguments, ram usage: 14.3 +25-08-31 09:15:12 | D | + x - AbsMax +25-08-31 09:15:12 | D | + x = [min=0.1699, max=19.2500] +25-08-31 09:15:12 | D | + w - AbsMax +25-08-31 09:15:12 | D | + w = [min=0.1050, max=3.1094] +25-08-31 09:15:12 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 09:15:14 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 09:17:15 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:17:15 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:17:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:17:15 | D | - sum error = [ 8416.1820, 8369.6284, 8319.7562, 8253.1788, 8215.3456] +25-08-31 09:17:15 | D | - best error = [ 8416.1820, 8369.6284, 8319.7562, 8253.1788, 8215.3456] +25-08-31 09:17:15 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:17:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:17:15 | D | - sum error = [ 8179.2352, 8152.5414, 8239.5670, 8132.1889, 8123.1708] +25-08-31 09:17:15 | D | - best error = [ 8179.2352, 8152.5414, 8152.5414, 8132.1889, 8123.1708] +25-08-31 09:17:15 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:17:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:17:15 | D | - sum error = [ 8162.2906, 8146.2936, 8161.2979, 8171.1184, 8215.9211] +25-08-31 09:17:15 | D | - best error = [ 8123.1708, 8123.1708, 8123.1708, 8123.1708, 8123.1708] +25-08-31 09:17:15 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:17:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:17:15 | D | - sum error = [ 8221.8392, 8277.6746, 8332.5276, 8379.9050, 8421.7585] +25-08-31 09:17:15 | D | - best error = [ 8123.1708, 8123.1708, 8123.1708, 8123.1708, 8123.1708] +25-08-31 09:17:15 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:17:15 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:17:15 | D | - sum error = [ 9127.1735, 8839.0769, 8712.4100, 8527.2195, 8415.0912] +25-08-31 09:17:15 | D | - best error = [ 8123.1708, 8123.1708, 8123.1708, 8123.1708, 8123.1708] +25-08-31 09:17:15 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:17:15 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:17:15 | D | - sum error = [ 8288.7191, 8214.4405, 8192.4765, 8114.0254, 8098.1341] +25-08-31 09:17:15 | D | - best error = [ 8123.1708, 8123.1708, 8123.1708, 8114.0254, 8098.1341] +25-08-31 09:17:15 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:17:15 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:17:15 | D | - sum error = [ 8070.6540, 8058.1065, 8053.4251, 8085.4741, 8109.5026] +25-08-31 09:17:15 | D | - best error = [ 8070.6540, 8058.1065, 8053.4251, 8053.4251, 8053.4251] +25-08-31 09:17:15 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:17:15 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:17:15 | D | - sum error = [ 8168.0162, 8233.0150, 8306.9932, 8391.4471] +25-08-31 09:17:15 | D | - best error = [ 8053.4251, 8053.4251, 8053.4251, 8053.4251] +25-08-31 09:17:15 | D | + error = 8053.4251 +25-08-31 09:17:15 | D | + scale = [min=0.3037, max=9.7740] +25-08-31 09:17:33 | D | - Smoothing Diffusion Block single_transformer_blocks.35 +25-08-31 09:17:33 | D | - Skipping Module single_transformer_blocks.35.norm.linear +25-08-31 09:17:33 | D | - Smoothing Transformer Block single_transformer_blocks.35 +25-08-31 09:17:33 | D | - single_transformer_blocks.35.attn.qkv_proj + single_transformer_blocks.35.up_proj +25-08-31 09:17:33 | D | + w: sfp4_e2m1_all +25-08-31 09:17:33 | D | + x: sfp4_e2m1_all +25-08-31 09:17:33 | D | + y: None +25-08-31 09:17:33 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:17:33 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 09:17:34 | D | + x - AbsMax +25-08-31 09:17:34 | D | + x = [min=0.4453, max=25.3750] +25-08-31 09:17:34 | D | + w - AbsMax +25-08-31 09:17:34 | D | + w = [min=0.1514, max=0.8828] +25-08-31 09:17:34 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 09:17:35 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 09:19:43 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:19:43 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:19:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:19:43 | D | - sum error = [174886.7248, 169037.0915, 166490.4616, 163542.3171, 160788.2217] +25-08-31 09:19:43 | D | - best error = [174886.7248, 169037.0915, 166490.4616, 163542.3171, 160788.2217] +25-08-31 09:19:43 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:19:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:19:43 | D | - sum error = [157502.6672, 155524.9184, 153350.9381, 151916.6289, 150756.5068] +25-08-31 09:19:43 | D | - best error = [157502.6672, 155524.9184, 153350.9381, 151916.6289, 150756.5068] +25-08-31 09:19:43 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:19:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:19:43 | D | - sum error = [150064.7429, 149322.5954, 149286.6365, 148732.9845, 148805.8562] +25-08-31 09:19:43 | D | - best error = [150064.7429, 149322.5954, 149286.6365, 148732.9845, 148732.9845] +25-08-31 09:19:43 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:19:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:19:43 | D | - sum error = [148395.6036, 149845.5004, 149061.7088, 149381.2578, 150594.6655] +25-08-31 09:19:43 | D | - best error = [148395.6036, 148395.6036, 148395.6036, 148395.6036, 148395.6036] +25-08-31 09:19:43 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:19:43 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:19:43 | D | - sum error = [235421.8926, 223607.9861, 210883.5156, 199191.9266, 190122.3387] +25-08-31 09:19:43 | D | - best error = [148395.6036, 148395.6036, 148395.6036, 148395.6036, 148395.6036] +25-08-31 09:19:43 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:19:43 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:19:43 | D | - sum error = [182477.9960, 175886.0024, 169728.0543, 164985.7501, 160801.8563] +25-08-31 09:19:43 | D | - best error = [148395.6036, 148395.6036, 148395.6036, 148395.6036, 148395.6036] +25-08-31 09:19:43 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:19:43 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:19:43 | D | - sum error = [157992.5005, 154051.2040, 151936.3976, 150942.3142, 149450.3602] +25-08-31 09:19:43 | D | - best error = [148395.6036, 148395.6036, 148395.6036, 148395.6036, 148395.6036] +25-08-31 09:19:43 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:19:43 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:19:43 | D | - sum error = [149005.9237, 148816.7216, 149830.0759, 149656.6139] +25-08-31 09:19:43 | D | - best error = [148395.6036, 148395.6036, 148395.6036, 148395.6036] +25-08-31 09:19:43 | D | + error = 148395.6036 +25-08-31 09:19:43 | D | + scale = [min=0.5451, max=11.3059] +25-08-31 09:19:43 | D | - single_transformer_blocks.35.attn.out_proj +25-08-31 09:19:43 | D | + w: sfp4_e2m1_all +25-08-31 09:19:43 | D | + x: sfp4_e2m1_all +25-08-31 09:19:43 | D | + y: None +25-08-31 09:19:43 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:19:43 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 09:19:44 | D | + x - AbsMax +25-08-31 09:19:44 | D | + x = [min=0.7617, max=13.3125] +25-08-31 09:19:44 | D | + w - AbsMax +25-08-31 09:19:44 | D | + w = [min=0.0972, max=0.5352] +25-08-31 09:19:44 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 09:19:45 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 09:20:46 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:20:46 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:20:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:20:46 | D | - sum error = [10153.4447, 10101.0427, 10075.6757, 10060.2724, 9999.8184] +25-08-31 09:20:46 | D | - best error = [10153.4447, 10101.0427, 10075.6757, 10060.2724, 9999.8184] +25-08-31 09:20:46 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:20:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:20:46 | D | - sum error = [ 9963.2509, 9933.2464, 9864.3917, 9871.0409, 9799.6277] +25-08-31 09:20:46 | D | - best error = [ 9963.2509, 9933.2464, 9864.3917, 9864.3917, 9799.6277] +25-08-31 09:20:46 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:20:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:20:46 | D | - sum error = [ 9778.3148, 9738.5194, 9718.6814, 9711.9395, 9697.7012] +25-08-31 09:20:46 | D | - best error = [ 9778.3148, 9738.5194, 9718.6814, 9711.9395, 9697.7012] +25-08-31 09:20:46 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:20:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:20:46 | D | - sum error = [ 9690.4943, 9711.4357, 9757.6448, 9762.5755, 9756.4297] +25-08-31 09:20:46 | D | - best error = [ 9690.4943, 9690.4943, 9690.4943, 9690.4943, 9690.4943] +25-08-31 09:20:46 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:20:46 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:20:46 | D | - sum error = [10058.7192, 10023.8272, 9970.5205, 9899.6477, 9852.8932] +25-08-31 09:20:46 | D | - best error = [ 9690.4943, 9690.4943, 9690.4943, 9690.4943, 9690.4943] +25-08-31 09:20:46 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:20:46 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:20:46 | D | - sum error = [ 9834.3167, 9770.4469, 9765.9089, 9719.7321, 9684.1132] +25-08-31 09:20:46 | D | - best error = [ 9690.4943, 9690.4943, 9690.4943, 9690.4943, 9684.1132] +25-08-31 09:20:46 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:20:46 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:20:46 | D | - sum error = [ 9651.8993, 9638.9900, 9659.3615, 9637.8811, 9661.3279] +25-08-31 09:20:46 | D | - best error = [ 9651.8993, 9638.9900, 9638.9900, 9637.8811, 9637.8811] +25-08-31 09:20:46 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:20:46 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:20:46 | D | - sum error = [ 9671.2766, 9683.1057, 9704.7341, 9741.9037] +25-08-31 09:20:46 | D | - best error = [ 9637.8811, 9637.8811, 9637.8811, 9637.8811] +25-08-31 09:20:46 | D | + error = 9637.8811 +25-08-31 09:20:46 | D | + scale = [min=1.1806, max=9.7529] +25-08-31 09:20:47 | D | - single_transformer_blocks.35.down_proj +25-08-31 09:20:47 | D | + w: sfp4_e2m1_all +25-08-31 09:20:47 | D | + x: sfp4_e2m1_all +25-08-31 09:20:47 | D | + y: None +25-08-31 09:20:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:20:47 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 09:20:49 | D | + x - AbsMax +25-08-31 09:20:49 | D | + x = [min=0.2734, max=18.7500] +25-08-31 09:20:49 | D | + w - AbsMax +25-08-31 09:20:49 | D | + w = [min=0.1191, max=3.0625] +25-08-31 09:20:49 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 09:20:51 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 09:22:48 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:22:48 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:22:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:22:48 | D | - sum error = [ 9140.9028, 9090.4693, 9032.8985, 8926.6249, 8883.1445] +25-08-31 09:22:48 | D | - best error = [ 9140.9028, 9090.4693, 9032.8985, 8926.6249, 8883.1445] +25-08-31 09:22:48 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:22:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:22:48 | D | - sum error = [ 8825.3353, 8768.8106, 8752.9739, 8698.8303, 8701.8196] +25-08-31 09:22:48 | D | - best error = [ 8825.3353, 8768.8106, 8752.9739, 8698.8303, 8698.8303] +25-08-31 09:22:48 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:22:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:22:48 | D | - sum error = [ 8680.3364, 8683.2736, 8687.7744, 9074.8289, 8760.6182] +25-08-31 09:22:48 | D | - best error = [ 8680.3364, 8680.3364, 8680.3364, 8680.3364, 8680.3364] +25-08-31 09:22:48 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:22:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:22:48 | D | - sum error = [ 8809.9339, 8881.2873, 8922.1769, 9002.3472, 9081.7575] +25-08-31 09:22:48 | D | - best error = [ 8680.3364, 8680.3364, 8680.3364, 8680.3364, 8680.3364] +25-08-31 09:22:48 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:22:48 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:22:48 | D | - sum error = [ 9761.8904, 9560.5071, 9414.0690, 9282.6638, 9155.6059] +25-08-31 09:22:48 | D | - best error = [ 8680.3364, 8680.3364, 8680.3364, 8680.3364, 8680.3364] +25-08-31 09:22:48 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:22:48 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:22:48 | D | - sum error = [ 9090.0851, 8942.0177, 8858.1705, 8804.4684, 8758.9643] +25-08-31 09:22:48 | D | - best error = [ 8680.3364, 8680.3364, 8680.3364, 8680.3364, 8680.3364] +25-08-31 09:22:48 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:22:48 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:22:48 | D | - sum error = [ 8689.4166, 8893.5887, 8703.7390, 8704.7389, 8742.3850] +25-08-31 09:22:48 | D | - best error = [ 8680.3364, 8680.3364, 8680.3364, 8680.3364, 8680.3364] +25-08-31 09:22:48 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:22:48 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:22:48 | D | - sum error = [ 8795.6007, 8850.2185, 8944.6033, 9069.1503] +25-08-31 09:22:48 | D | - best error = [ 8680.3364, 8680.3364, 8680.3364, 8680.3364] +25-08-31 09:22:48 | D | + error = 8680.3364 +25-08-31 09:22:48 | D | + scale = [min=0.5229, max=4.3301] +25-08-31 09:23:06 | D | - Smoothing Diffusion Block single_transformer_blocks.36 +25-08-31 09:23:06 | D | - Skipping Module single_transformer_blocks.36.norm.linear +25-08-31 09:23:06 | D | - Smoothing Transformer Block single_transformer_blocks.36 +25-08-31 09:23:06 | D | - single_transformer_blocks.36.attn.qkv_proj + single_transformer_blocks.36.up_proj +25-08-31 09:23:06 | D | + w: sfp4_e2m1_all +25-08-31 09:23:06 | D | + x: sfp4_e2m1_all +25-08-31 09:23:06 | D | + y: None +25-08-31 09:23:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:23:06 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 09:23:06 | D | + x - AbsMax +25-08-31 09:23:06 | D | + x = [min=0.4531, max=27.8750] +25-08-31 09:23:06 | D | + w - AbsMax +25-08-31 09:23:06 | D | + w = [min=0.1338, max=0.7969] +25-08-31 09:23:06 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 09:23:07 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 09:25:19 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:25:19 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:25:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:25:19 | D | - sum error = [82368.3699, 79636.5761, 77327.3064, 75315.3430, 72583.1954] +25-08-31 09:25:19 | D | - best error = [82368.3699, 79636.5761, 77327.3064, 75315.3430, 72583.1954] +25-08-31 09:25:19 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:25:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:25:19 | D | - sum error = [71068.1602, 69580.9519, 68145.3543, 67097.2090, 66013.3359] +25-08-31 09:25:19 | D | - best error = [71068.1602, 69580.9519, 68145.3543, 67097.2090, 66013.3359] +25-08-31 09:25:19 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:25:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:25:19 | D | - sum error = [65125.6332, 64992.4609, 63794.2573, 63722.0344, 63495.9793] +25-08-31 09:25:19 | D | - best error = [65125.6332, 64992.4609, 63794.2573, 63722.0344, 63495.9793] +25-08-31 09:25:19 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:25:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:25:19 | D | - sum error = [64239.3445, 63931.6835, 64076.1028, 64762.9800, 65347.4632] +25-08-31 09:25:19 | D | - best error = [63495.9793, 63495.9793, 63495.9793, 63495.9793, 63495.9793] +25-08-31 09:25:19 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:25:19 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:25:19 | D | - sum error = [113728.6663, 107751.2133, 100840.9821, 94800.4307, 89040.7720] +25-08-31 09:25:19 | D | - best error = [63495.9793, 63495.9793, 63495.9793, 63495.9793, 63495.9793] +25-08-31 09:25:19 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:25:19 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:25:19 | D | - sum error = [85398.6417, 80111.0110, 76075.2268, 73360.2704, 70853.4740] +25-08-31 09:25:19 | D | - best error = [63495.9793, 63495.9793, 63495.9793, 63495.9793, 63495.9793] +25-08-31 09:25:19 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:25:19 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:25:19 | D | - sum error = [68599.8204, 67225.2943, 66044.8333, 65168.9919, 64588.8114] +25-08-31 09:25:19 | D | - best error = [63495.9793, 63495.9793, 63495.9793, 63495.9793, 63495.9793] +25-08-31 09:25:19 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:25:19 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:25:19 | D | - sum error = [64332.4525, 64370.7640, 64379.3159, 65447.9271] +25-08-31 09:25:19 | D | - best error = [63495.9793, 63495.9793, 63495.9793, 63495.9793] +25-08-31 09:25:19 | D | + error = 63495.9793 +25-08-31 09:25:19 | D | + scale = [min=0.5746, max=10.2719] +25-08-31 09:25:19 | D | - single_transformer_blocks.36.attn.out_proj +25-08-31 09:25:19 | D | + w: sfp4_e2m1_all +25-08-31 09:25:19 | D | + x: sfp4_e2m1_all +25-08-31 09:25:19 | D | + y: None +25-08-31 09:25:19 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:25:19 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 09:25:20 | D | + x - AbsMax +25-08-31 09:25:20 | D | + x = [min=0.7930, max=16.7500] +25-08-31 09:25:20 | D | + w - AbsMax +25-08-31 09:25:20 | D | + w = [min=0.1060, max=0.6562] +25-08-31 09:25:20 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 09:25:21 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 09:26:22 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:26:22 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:26:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:26:22 | D | - sum error = [ 9783.5793, 9724.1131, 9673.6442, 9587.2165, 9531.8418] +25-08-31 09:26:22 | D | - best error = [ 9783.5793, 9724.1131, 9673.6442, 9587.2165, 9531.8418] +25-08-31 09:26:22 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:26:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:26:22 | D | - sum error = [ 9461.2705, 9383.2421, 9360.5308, 9300.2598, 9245.7402] +25-08-31 09:26:22 | D | - best error = [ 9461.2705, 9383.2421, 9360.5308, 9300.2598, 9245.7402] +25-08-31 09:26:22 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:26:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:26:22 | D | - sum error = [ 9219.6308, 9199.4713, 9167.0122, 9197.5971, 9180.7303] +25-08-31 09:26:22 | D | - best error = [ 9219.6308, 9199.4713, 9167.0122, 9167.0122, 9167.0122] +25-08-31 09:26:22 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:26:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:26:22 | D | - sum error = [ 9189.3369, 9206.9296, 9258.1683, 9273.9308, 9354.7284] +25-08-31 09:26:22 | D | - best error = [ 9167.0122, 9167.0122, 9167.0122, 9167.0122, 9167.0122] +25-08-31 09:26:22 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:26:22 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:26:22 | D | - sum error = [10111.3679, 9974.1634, 9859.6608, 9766.8378, 9698.1516] +25-08-31 09:26:22 | D | - best error = [ 9167.0122, 9167.0122, 9167.0122, 9167.0122, 9167.0122] +25-08-31 09:26:22 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:26:22 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:26:22 | D | - sum error = [ 9551.2194, 9495.1230, 9399.4515, 9333.5575, 9303.0085] +25-08-31 09:26:22 | D | - best error = [ 9167.0122, 9167.0122, 9167.0122, 9167.0122, 9167.0122] +25-08-31 09:26:22 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:26:22 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:26:22 | D | - sum error = [ 9243.5249, 9224.4107, 9255.4749, 9259.6242, 9217.7329] +25-08-31 09:26:22 | D | - best error = [ 9167.0122, 9167.0122, 9167.0122, 9167.0122, 9167.0122] +25-08-31 09:26:22 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:26:22 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:26:22 | D | - sum error = [ 9231.8579, 9261.8568, 9260.2251, 9331.6605] +25-08-31 09:26:22 | D | - best error = [ 9167.0122, 9167.0122, 9167.0122, 9167.0122] +25-08-31 09:26:22 | D | + error = 9167.0122 +25-08-31 09:26:22 | D | + scale = [min=0.8701, max=5.4251] +25-08-31 09:26:23 | D | - single_transformer_blocks.36.down_proj +25-08-31 09:26:23 | D | + w: sfp4_e2m1_all +25-08-31 09:26:23 | D | + x: sfp4_e2m1_all +25-08-31 09:26:23 | D | + y: None +25-08-31 09:26:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:26:23 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 09:26:24 | D | + x - AbsMax +25-08-31 09:26:24 | D | + x = [min=0.6836, max=17.0000] +25-08-31 09:26:24 | D | + w - AbsMax +25-08-31 09:26:24 | D | + w = [min=0.1279, max=1.1016] +25-08-31 09:26:24 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 09:26:26 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 09:28:24 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:28:24 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:28:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:28:24 | D | - sum error = [10933.9629, 10855.4864, 10809.5143, 10768.7251, 10687.0561] +25-08-31 09:28:24 | D | - best error = [10933.9629, 10855.4864, 10809.5143, 10768.7251, 10687.0561] +25-08-31 09:28:24 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:28:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:28:24 | D | - sum error = [10685.0175, 10656.4471, 10664.8053, 10662.5753, 10653.4351] +25-08-31 09:28:24 | D | - best error = [10685.0175, 10656.4471, 10656.4471, 10656.4471, 10653.4351] +25-08-31 09:28:24 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:28:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:28:24 | D | - sum error = [10711.0900, 10821.3530, 10756.8375, 10793.7671, 10839.6321] +25-08-31 09:28:24 | D | - best error = [10653.4351, 10653.4351, 10653.4351, 10653.4351, 10653.4351] +25-08-31 09:28:24 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:28:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:28:24 | D | - sum error = [10888.1098, 10937.5734, 11061.8328, 11145.9733, 11206.1926] +25-08-31 09:28:24 | D | - best error = [10653.4351, 10653.4351, 10653.4351, 10653.4351, 10653.4351] +25-08-31 09:28:24 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:28:24 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:28:24 | D | - sum error = [12291.5848, 12069.0228, 11890.1398, 11739.2811, 11616.9876] +25-08-31 09:28:24 | D | - best error = [10653.4351, 10653.4351, 10653.4351, 10653.4351, 10653.4351] +25-08-31 09:28:24 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:28:24 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:28:24 | D | - sum error = [11461.6446, 11361.0711, 11257.2160, 11153.2636, 11096.5270] +25-08-31 09:28:24 | D | - best error = [10653.4351, 10653.4351, 10653.4351, 10653.4351, 10653.4351] +25-08-31 09:28:24 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:28:24 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:28:24 | D | - sum error = [11035.5049, 11024.7590, 10991.9541, 10979.1548, 11010.2930] +25-08-31 09:28:24 | D | - best error = [10653.4351, 10653.4351, 10653.4351, 10653.4351, 10653.4351] +25-08-31 09:28:24 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:28:24 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:28:24 | D | - sum error = [11002.6906, 11082.9880, 11114.9737, 11226.4367] +25-08-31 09:28:24 | D | - best error = [10653.4351, 10653.4351, 10653.4351, 10653.4351] +25-08-31 09:28:24 | D | + error = 10653.4351 +25-08-31 09:28:24 | D | + scale = [min=0.8427, max=3.5785] +25-08-31 09:28:44 | D | - Smoothing Diffusion Block single_transformer_blocks.37 +25-08-31 09:28:44 | D | - Skipping Module single_transformer_blocks.37.norm.linear +25-08-31 09:28:44 | D | - Smoothing Transformer Block single_transformer_blocks.37 +25-08-31 09:28:44 | D | - single_transformer_blocks.37.attn.qkv_proj + single_transformer_blocks.37.up_proj +25-08-31 09:28:44 | D | + w: sfp4_e2m1_all +25-08-31 09:28:44 | D | + x: sfp4_e2m1_all +25-08-31 09:28:44 | D | + y: None +25-08-31 09:28:44 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:28:44 | D | + finished parsing calibration arguments, ram usage: 14.0 +25-08-31 09:28:44 | D | + x - AbsMax +25-08-31 09:28:44 | D | + x = [min=0.2139, max=29.2500] +25-08-31 09:28:44 | D | + w - AbsMax +25-08-31 09:28:44 | D | + w = [min=0.1196, max=0.6016] +25-08-31 09:28:44 | D | + finished resetting calibrator, ram usage: 14.0 +25-08-31 09:28:46 | D | + finished calculating the original outputs, ram usage: 14.0 +25-08-31 09:30:55 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:30:55 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:30:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:30:55 | D | - sum error = [28363.4469, 27581.7876, 26704.3715, 25572.1499, 24441.6615] +25-08-31 09:30:55 | D | - best error = [28363.4469, 27581.7876, 26704.3715, 25572.1499, 24441.6615] +25-08-31 09:30:55 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:30:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:30:55 | D | - sum error = [23239.8637, 22670.5187, 21493.9433, 21220.2756, 20971.4694] +25-08-31 09:30:55 | D | - best error = [23239.8637, 22670.5187, 21493.9433, 21220.2756, 20971.4694] +25-08-31 09:30:55 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:30:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:30:55 | D | - sum error = [20922.8220, 20930.9990, 20550.6843, 20575.8103, 19676.8038] +25-08-31 09:30:55 | D | - best error = [20922.8220, 20922.8220, 20550.6843, 20550.6843, 19676.8038] +25-08-31 09:30:55 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:30:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:30:55 | D | - sum error = [19668.3006, 19549.8395, 19809.5831, 20879.2781, 20450.1986] +25-08-31 09:30:55 | D | - best error = [19668.3006, 19549.8395, 19549.8395, 19549.8395, 19549.8395] +25-08-31 09:30:55 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:30:55 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:30:55 | D | - sum error = [31613.4068, 28926.8163, 28380.9040, 26964.9883, 26431.0726] +25-08-31 09:30:55 | D | - best error = [19549.8395, 19549.8395, 19549.8395, 19549.8395, 19549.8395] +25-08-31 09:30:55 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:30:55 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:30:55 | D | - sum error = [24083.5612, 23217.3218, 22729.4523, 21933.8402, 20999.6221] +25-08-31 09:30:55 | D | - best error = [19549.8395, 19549.8395, 19549.8395, 19549.8395, 19549.8395] +25-08-31 09:30:55 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:30:55 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:30:55 | D | - sum error = [21083.2992, 20238.0015, 20604.7125, 20230.2221, 20119.1729] +25-08-31 09:30:55 | D | - best error = [19549.8395, 19549.8395, 19549.8395, 19549.8395, 19549.8395] +25-08-31 09:30:55 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:30:55 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:30:55 | D | - sum error = [20265.4046, 20604.3141, 20111.9356, 20185.9523] +25-08-31 09:30:55 | D | - best error = [19549.8395, 19549.8395, 19549.8395, 19549.8395] +25-08-31 09:30:55 | D | + error = 19549.8395 +25-08-31 09:30:55 | D | + scale = [min=0.2911, max=14.8902] +25-08-31 09:30:55 | D | - single_transformer_blocks.37.attn.out_proj +25-08-31 09:30:55 | D | + w: sfp4_e2m1_all +25-08-31 09:30:55 | D | + x: sfp4_e2m1_all +25-08-31 09:30:55 | D | + y: None +25-08-31 09:30:55 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:30:55 | D | + finished parsing calibration arguments, ram usage: 14.0 +25-08-31 09:30:56 | D | + x - AbsMax +25-08-31 09:30:56 | D | + x = [min=0.9492, max=11.9375] +25-08-31 09:30:56 | D | + w - AbsMax +25-08-31 09:30:56 | D | + w = [min=0.0613, max=0.8867] +25-08-31 09:30:56 | D | + finished resetting calibrator, ram usage: 14.0 +25-08-31 09:30:57 | D | + finished calculating the original outputs, ram usage: 14.0 +25-08-31 09:31:55 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:31:55 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:31:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:31:55 | D | - sum error = [ 7389.9383, 7415.2022, 7416.7777, 7279.0851, 7185.7748] +25-08-31 09:31:55 | D | - best error = [ 7389.9383, 7389.9383, 7389.9383, 7279.0851, 7185.7748] +25-08-31 09:31:55 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:31:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:31:55 | D | - sum error = [ 7081.5710, 7075.6942, 7100.0687, 7130.9736, 7176.2720] +25-08-31 09:31:55 | D | - best error = [ 7081.5710, 7075.6942, 7075.6942, 7075.6942, 7075.6942] +25-08-31 09:31:55 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:31:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:31:55 | D | - sum error = [ 7243.2225, 7310.9488, 7421.9906, 7371.4834, 7272.6644] +25-08-31 09:31:55 | D | - best error = [ 7075.6942, 7075.6942, 7075.6942, 7075.6942, 7075.6942] +25-08-31 09:31:55 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:31:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:31:55 | D | - sum error = [ 7125.7407, 7192.0031, 7173.2787, 7352.9515, 7354.4786] +25-08-31 09:31:55 | D | - best error = [ 7075.6942, 7075.6942, 7075.6942, 7075.6942, 7075.6942] +25-08-31 09:31:55 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:31:55 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:31:55 | D | - sum error = [ 7173.6395, 7149.1474, 7139.2406, 7036.4153, 6959.3701] +25-08-31 09:31:55 | D | - best error = [ 7075.6942, 7075.6942, 7075.6942, 7036.4153, 6959.3701] +25-08-31 09:31:55 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:31:55 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:31:55 | D | - sum error = [ 6913.1781, 6895.1534, 6960.9445, 6907.7982, 6871.9924] +25-08-31 09:31:55 | D | - best error = [ 6913.1781, 6895.1534, 6895.1534, 6895.1534, 6871.9924] +25-08-31 09:31:55 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:31:55 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:31:55 | D | - sum error = [ 6814.2727, 6905.4657, 6986.1612, 7127.2261, 7230.5604] +25-08-31 09:31:55 | D | - best error = [ 6814.2727, 6814.2727, 6814.2727, 6814.2727, 6814.2727] +25-08-31 09:31:55 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:31:55 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:31:55 | D | - sum error = [ 7327.9992, 7397.6523, 7404.0872, 7387.5346] +25-08-31 09:31:55 | D | - best error = [ 6814.2727, 6814.2727, 6814.2727, 6814.2727] +25-08-31 09:31:55 | D | + error = 6814.2727 +25-08-31 09:31:55 | D | + scale = [min=1.8184, max=10.2003] +25-08-31 09:31:55 | D | - single_transformer_blocks.37.down_proj +25-08-31 09:31:55 | D | + w: sfp4_e2m1_all +25-08-31 09:31:55 | D | + x: sfp4_e2m1_all +25-08-31 09:31:55 | D | + y: None +25-08-31 09:31:55 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:31:55 | D | + finished parsing calibration arguments, ram usage: 14.0 +25-08-31 09:31:57 | D | + x - AbsMax +25-08-31 09:31:57 | D | + x = [min=2.0625, max=16.6250] +25-08-31 09:31:57 | D | + w - AbsMax +25-08-31 09:31:57 | D | + w = [min=0.0554, max=0.8086] +25-08-31 09:31:57 | D | + finished resetting calibrator, ram usage: 14.0 +25-08-31 09:31:59 | D | + finished calculating the original outputs, ram usage: 14.0 +25-08-31 09:33:52 | D | - x / w range = AbsMax / AbsMax +25-08-31 09:33:52 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-31 09:33:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:33:52 | D | - sum error = [17585.3248, 17164.1198, 16803.0512, 16433.4985, 16155.7965] +25-08-31 09:33:52 | D | - best error = [17585.3248, 17164.1198, 16803.0512, 16433.4985, 16155.7965] +25-08-31 09:33:52 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-31 09:33:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:33:52 | D | - sum error = [15909.9315, 15695.8838, 15433.0549, 15168.0454, 14994.2677] +25-08-31 09:33:52 | D | - best error = [15909.9315, 15695.8838, 15433.0549, 15168.0454, 14994.2677] +25-08-31 09:33:52 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-31 09:33:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:33:52 | D | - sum error = [14849.1885, 14598.4635, 14426.1429, 14281.1523, 14066.8200] +25-08-31 09:33:52 | D | - best error = [14849.1885, 14598.4635, 14426.1429, 14281.1523, 14066.8200] +25-08-31 09:33:52 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:33:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-31 09:33:52 | D | - sum error = [13935.3731, 13847.4050, 13680.3807, 13583.3763, 13510.3617] +25-08-31 09:33:52 | D | - best error = [13935.3731, 13847.4050, 13680.3807, 13583.3763, 13510.3617] +25-08-31 09:33:52 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-31 09:33:52 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-31 09:33:52 | D | - sum error = [11596.6568, 11507.1411, 11433.2938, 11441.4279, 11402.5092] +25-08-31 09:33:52 | D | - best error = [11596.6568, 11507.1411, 11433.2938, 11433.2938, 11402.5092] +25-08-31 09:33:52 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-31 09:33:52 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-31 09:33:52 | D | - sum error = [11441.2419, 11390.8995, 11439.0664, 11504.2392, 11537.3186] +25-08-31 09:33:52 | D | - best error = [11402.5092, 11390.8995, 11390.8995, 11390.8995, 11390.8995] +25-08-31 09:33:52 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-31 09:33:52 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-31 09:33:52 | D | - sum error = [11655.8559, 11744.1069, 11848.1013, 12264.5518, 12176.9625] +25-08-31 09:33:52 | D | - best error = [11390.8995, 11390.8995, 11390.8995, 11390.8995, 11390.8995] +25-08-31 09:33:52 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-31 09:33:52 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-31 09:33:52 | D | - sum error = [12352.4262, 12537.3062, 12967.1067, 13099.3887] +25-08-31 09:33:52 | D | - best error = [11390.8995, 11390.8995, 11390.8995, 11390.8995] +25-08-31 09:33:52 | D | + error = 11390.8995 +25-08-31 09:33:52 | D | + scale = [min=2.2721, max=13.6850] +25-08-31 09:33:53 | I | - Saving smooth scales to runs/diffusion/cache/quant/qdiff.128/smooth/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]-w.include.[tan+tn]/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]/flux.1-dev-ghibli.pt +25-08-31 09:33:53 | I | - Linking smooth scales to runs/diffusion/flux.1/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000.RUNNING/run-250831.030550.RUNNING/model/smooth.pt +25-08-31 09:33:54 | I | * Quantizing weights +25-08-31 09:33:54 | I | - Generating weight settings +25-08-31 09:33:54 | I | - Generating branch settings +25-08-31 09:33:54 | I | * Adding low-rank branches to weights +25-08-31 09:34:51 | D | - Calibrating low-rank branches of block transformer_blocks.0 +25-08-31 09:34:51 | D | - Calibrating low-rank branch for transformer_blocks.0.attn.to_q, transformer_blocks.0.attn.to_k, transformer_blocks.0.attn.to_v +25-08-31 09:34:51 | D | + w: sfp4_e2m1_all +25-08-31 09:34:51 | D | + x: sfp4_e2m1_all +25-08-31 09:34:51 | D | + y: None +25-08-31 09:34:51 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:34:51 | D | + finished parsing calibration arguments, ram usage: 15.1 +25-08-31 09:34:51 | D | + finished resetting calibrator, ram usage: 15.1 +25-08-31 09:34:52 | D | + finished calculating the original outputs, ram usage: 15.1 +25-08-31 09:35:00 | D | - iter = [ 0, 1, 2, 3] +25-08-31 09:35:00 | D | - error = [ 240.9562, 236.1548, 229.4421, 229.7459] +25-08-31 09:35:00 | D | - best error = [ 240.9562, 236.1548, 229.4421, 229.4421] +25-08-31 09:35:00 | D | + Adding low-rank branches to transformer_blocks.0.attn.to_q, transformer_blocks.0.attn.to_k, transformer_blocks.0.attn.to_v +25-08-31 09:35:00 | D | - Calibrating low-rank branch for transformer_blocks.0.attn.add_q_proj, transformer_blocks.0.attn.add_k_proj, transformer_blocks.0.attn.add_v_proj +25-08-31 09:35:00 | D | + w: sfp4_e2m1_all +25-08-31 09:35:00 | D | + x: sfp4_e2m1_all +25-08-31 09:35:00 | D | + y: None +25-08-31 09:35:00 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:35:00 | D | + finished parsing calibration arguments, ram usage: 15.1 +25-08-31 09:35:00 | D | + finished resetting calibrator, ram usage: 15.1 +25-08-31 09:35:01 | D | + finished calculating the original outputs, ram usage: 15.1 +25-08-31 09:35:06 | D | - iter = [ 0, 1, 2] +25-08-31 09:35:06 | D | - error = [ 396.2177, 390.3568, 398.4821] +25-08-31 09:35:06 | D | - best error = [ 396.2177, 390.3568, 390.3568] +25-08-31 09:35:06 | D | + Adding low-rank branches to transformer_blocks.0.attn.add_q_proj, transformer_blocks.0.attn.add_k_proj, transformer_blocks.0.attn.add_v_proj +25-08-31 09:35:06 | D | - Calibrating low-rank branch for transformer_blocks.0.attn.to_out.0 +25-08-31 09:35:06 | D | + w: sfp4_e2m1_all +25-08-31 09:35:06 | D | + x: sfp4_e2m1_all +25-08-31 09:35:06 | D | + y: None +25-08-31 09:35:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:35:06 | D | + finished parsing calibration arguments, ram usage: 15.1 +25-08-31 09:35:06 | D | + finished resetting calibrator, ram usage: 15.1 +25-08-31 09:35:07 | D | + finished calculating the original outputs, ram usage: 15.1 +25-08-31 09:35:20 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 09:35:20 | D | - error = [ 1008.9810, 992.1614, 963.8599, 960.0847, 948.9462, 941.3188, 938.1778, 931.9412, 935.2863] +25-08-31 09:35:20 | D | - best error = [ 1008.9810, 992.1614, 963.8599, 960.0847, 948.9462, 941.3188, 938.1778, 931.9412, 931.9412] +25-08-31 09:35:20 | D | + Adding low-rank branches to transformer_blocks.0.attn.to_out.0 +25-08-31 09:35:20 | D | - Calibrating low-rank branch for transformer_blocks.0.attn.to_add_out +25-08-31 09:35:20 | D | + w: sfp4_e2m1_all +25-08-31 09:35:20 | D | + x: sfp4_e2m1_all +25-08-31 09:35:20 | D | + y: None +25-08-31 09:35:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:35:20 | D | + finished parsing calibration arguments, ram usage: 15.1 +25-08-31 09:35:20 | D | + finished resetting calibrator, ram usage: 15.1 +25-08-31 09:35:20 | D | + finished calculating the original outputs, ram usage: 15.1 +25-08-31 09:35:27 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 09:35:27 | D | - error = [ 2415.2412, 2399.2194, 2384.5206, 2369.0428, 2359.1399, 2351.4492, 2352.7687] +25-08-31 09:35:27 | D | - best error = [ 2415.2412, 2399.2194, 2384.5206, 2369.0428, 2359.1399, 2351.4492, 2351.4492] +25-08-31 09:35:28 | D | + Adding low-rank branches to transformer_blocks.0.attn.to_add_out +25-08-31 09:35:28 | D | - Calibrating low-rank branch for transformer_blocks.0.ff.net.0.proj +25-08-31 09:35:28 | D | + w: sfp4_e2m1_all +25-08-31 09:35:28 | D | + x: sfp4_e2m1_all +25-08-31 09:35:28 | D | + y: None +25-08-31 09:35:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:35:28 | D | + finished parsing calibration arguments, ram usage: 15.1 +25-08-31 09:35:28 | D | + finished resetting calibrator, ram usage: 15.1 +25-08-31 09:35:29 | D | + finished calculating the original outputs, ram usage: 15.1 +25-08-31 09:35:48 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:35:48 | D | - error = [ 541.7809, 536.0162, 532.7249, 531.2256, 529.3089, 528.1256, 526.9621, 526.7347, 525.9093, 525.8951] +25-08-31 09:35:48 | D | - best error = [ 541.7809, 536.0162, 532.7249, 531.2256, 529.3089, 528.1256, 526.9621, 526.7347, 525.9093, 525.8951] +25-08-31 09:35:55 | D | - iter = [ 10, 11, 12, 13] +25-08-31 09:35:55 | D | - error = [ 525.7812, 525.4088, 524.5717, 526.9679] +25-08-31 09:35:55 | D | - best error = [ 525.7812, 525.4088, 524.5717, 524.5717] +25-08-31 09:35:56 | D | + Adding low-rank branches to transformer_blocks.0.ff.net.0.proj +25-08-31 09:35:56 | D | - Calibrating low-rank branch for transformer_blocks.0.ff.net.2 +25-08-31 09:35:56 | D | + w: sfp4_e2m1_all +25-08-31 09:35:56 | D | + x: sfp4_e2m1_all +25-08-31 09:35:56 | D | + y: None +25-08-31 09:35:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:35:56 | D | + finished parsing calibration arguments, ram usage: 15.1 +25-08-31 09:35:56 | D | + finished resetting calibrator, ram usage: 15.1 +25-08-31 09:35:57 | D | + finished calculating the original outputs, ram usage: 15.1 +25-08-31 09:36:23 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:36:23 | D | - error = [ 1183.9149, 1156.2200, 1138.8838, 1125.8775, 1116.7767, 1109.4288, 1102.9346, 1097.9420, 1092.9469, 1088.7034] +25-08-31 09:36:23 | D | - best error = [ 1183.9149, 1156.2200, 1138.8838, 1125.8775, 1116.7767, 1109.4288, 1102.9346, 1097.9420, 1092.9469, 1088.7034] +25-08-31 09:36:49 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 09:36:49 | D | - error = [ 1084.6979, 1081.9099, 1080.4599, 1078.0508, 1075.3505, 1073.9079, 1072.3395, 1071.6937, 1070.3452, 1069.7871] +25-08-31 09:36:49 | D | - best error = [ 1084.6979, 1081.9099, 1080.4599, 1078.0508, 1075.3505, 1073.9079, 1072.3395, 1071.6937, 1070.3452, 1069.7871] +25-08-31 09:37:14 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 09:37:14 | D | - error = [ 1067.9326, 1067.2945, 1065.8448, 1064.9113, 1064.7675, 1063.1980, 1062.8027, 1062.3243, 1061.3297, 1061.1103] +25-08-31 09:37:14 | D | - best error = [ 1067.9326, 1067.2945, 1065.8448, 1064.9113, 1064.7675, 1063.1980, 1062.8027, 1062.3243, 1061.3297, 1061.1103] +25-08-31 09:37:40 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-31 09:37:40 | D | - error = [ 1060.4910, 1059.4057, 1059.1986, 1059.1116, 1058.3739, 1057.9761, 1057.8046, 1057.4121, 1056.8322, 1056.9168] +25-08-31 09:37:40 | D | - best error = [ 1060.4910, 1059.4057, 1059.1986, 1059.1116, 1058.3739, 1057.9761, 1057.8046, 1057.4121, 1056.8322, 1056.8322] +25-08-31 09:37:40 | D | + Adding low-rank branches to transformer_blocks.0.ff.net.2 +25-08-31 09:37:41 | D | - Calibrating low-rank branch for transformer_blocks.0.ff_context.net.0.proj +25-08-31 09:37:41 | D | + w: sfp4_e2m1_all +25-08-31 09:37:41 | D | + x: sfp4_e2m1_all +25-08-31 09:37:41 | D | + y: None +25-08-31 09:37:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:37:41 | D | + finished parsing calibration arguments, ram usage: 15.1 +25-08-31 09:37:41 | D | + finished resetting calibrator, ram usage: 15.1 +25-08-31 09:37:41 | D | + finished calculating the original outputs, ram usage: 15.1 +25-08-31 09:37:52 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:37:52 | D | - error = [ 1323.1733, 1296.4081, 1281.0504, 1267.7122, 1256.8203, 1253.9911, 1246.3749, 1239.5356, 1235.5879, 1233.9151] +25-08-31 09:37:52 | D | - best error = [ 1323.1733, 1296.4081, 1281.0504, 1267.7122, 1256.8203, 1253.9911, 1246.3749, 1239.5356, 1235.5879, 1233.9151] +25-08-31 09:37:57 | D | - iter = [ 10, 11, 12, 13] +25-08-31 09:37:57 | D | - error = [ 1229.5212, 1225.5412, 1225.1707, 1242.1446] +25-08-31 09:37:57 | D | - best error = [ 1229.5212, 1225.5412, 1225.1707, 1225.1707] +25-08-31 09:37:57 | D | + Adding low-rank branches to transformer_blocks.0.ff_context.net.0.proj +25-08-31 09:37:57 | D | - Calibrating low-rank branch for transformer_blocks.0.ff_context.net.2 +25-08-31 09:37:57 | D | + w: sfp4_e2m1_all +25-08-31 09:37:57 | D | + x: sfp4_e2m1_all +25-08-31 09:37:57 | D | + y: None +25-08-31 09:37:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:37:57 | D | + finished parsing calibration arguments, ram usage: 15.1 +25-08-31 09:37:57 | D | + finished resetting calibrator, ram usage: 15.1 +25-08-31 09:37:58 | D | + finished calculating the original outputs, ram usage: 15.1 +25-08-31 09:38:10 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:38:10 | D | - error = [ 489.4073, 486.5909, 486.5255, 483.4103, 481.7110, 481.6006, 481.0364, 480.6912, 479.4325, 478.3792] +25-08-31 09:38:10 | D | - best error = [ 489.4073, 486.5909, 486.5255, 483.4103, 481.7110, 481.6006, 481.0364, 480.6912, 479.4325, 478.3792] +25-08-31 09:38:12 | D | - iter = [ 10, 11] +25-08-31 09:38:12 | D | - error = [ 477.6375, 477.7407] +25-08-31 09:38:12 | D | - best error = [ 477.6375, 477.6375] +25-08-31 09:38:12 | D | + Adding low-rank branches to transformer_blocks.0.ff_context.net.2 +25-08-31 09:38:30 | D | - Calibrating low-rank branches of block transformer_blocks.1 +25-08-31 09:38:30 | D | - Calibrating low-rank branch for transformer_blocks.1.attn.to_q, transformer_blocks.1.attn.to_k, transformer_blocks.1.attn.to_v +25-08-31 09:38:30 | D | + w: sfp4_e2m1_all +25-08-31 09:38:30 | D | + x: sfp4_e2m1_all +25-08-31 09:38:30 | D | + y: None +25-08-31 09:38:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:38:30 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:38:30 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:38:31 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:38:42 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 09:38:42 | D | - error = [ 456.9366, 414.4999, 395.9114, 395.0250, 377.2071, 377.3771] +25-08-31 09:38:42 | D | - best error = [ 456.9366, 414.4999, 395.9114, 395.0250, 377.2071, 377.2071] +25-08-31 09:38:42 | D | + Adding low-rank branches to transformer_blocks.1.attn.to_q, transformer_blocks.1.attn.to_k, transformer_blocks.1.attn.to_v +25-08-31 09:38:42 | D | - Calibrating low-rank branch for transformer_blocks.1.attn.add_q_proj, transformer_blocks.1.attn.add_k_proj, transformer_blocks.1.attn.add_v_proj +25-08-31 09:38:42 | D | + w: sfp4_e2m1_all +25-08-31 09:38:42 | D | + x: sfp4_e2m1_all +25-08-31 09:38:42 | D | + y: None +25-08-31 09:38:42 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:38:42 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:38:42 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:38:43 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:38:57 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 09:38:57 | D | - error = [ 675.9295, 655.2018, 622.1589, 618.9089, 611.1711, 608.1172, 603.1373, 608.6630] +25-08-31 09:38:57 | D | - best error = [ 675.9295, 655.2018, 622.1589, 618.9089, 611.1711, 608.1172, 603.1373, 603.1373] +25-08-31 09:38:57 | D | + Adding low-rank branches to transformer_blocks.1.attn.add_q_proj, transformer_blocks.1.attn.add_k_proj, transformer_blocks.1.attn.add_v_proj +25-08-31 09:38:57 | D | - Calibrating low-rank branch for transformer_blocks.1.attn.to_out.0 +25-08-31 09:38:57 | D | + w: sfp4_e2m1_all +25-08-31 09:38:57 | D | + x: sfp4_e2m1_all +25-08-31 09:38:57 | D | + y: None +25-08-31 09:38:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:38:57 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:38:57 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:38:58 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:39:08 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 09:39:08 | D | - error = [ 1335.8794, 1299.6873, 1276.9116, 1260.0622, 1259.3896, 1245.8056, 1257.2120] +25-08-31 09:39:08 | D | - best error = [ 1335.8794, 1299.6873, 1276.9116, 1260.0622, 1259.3896, 1245.8056, 1245.8056] +25-08-31 09:39:08 | D | + Adding low-rank branches to transformer_blocks.1.attn.to_out.0 +25-08-31 09:39:08 | D | - Calibrating low-rank branch for transformer_blocks.1.attn.to_add_out +25-08-31 09:39:08 | D | + w: sfp4_e2m1_all +25-08-31 09:39:08 | D | + x: sfp4_e2m1_all +25-08-31 09:39:08 | D | + y: None +25-08-31 09:39:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:39:08 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:39:08 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:39:08 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:39:15 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 09:39:15 | D | - error = [ 1943.9905, 1931.3723, 1900.0804, 1896.2988, 1878.8202, 1871.6421, 1875.7727] +25-08-31 09:39:15 | D | - best error = [ 1943.9905, 1931.3723, 1900.0804, 1896.2988, 1878.8202, 1871.6421, 1871.6421] +25-08-31 09:39:16 | D | + Adding low-rank branches to transformer_blocks.1.attn.to_add_out +25-08-31 09:39:16 | D | - Calibrating low-rank branch for transformer_blocks.1.ff.net.0.proj +25-08-31 09:39:16 | D | + w: sfp4_e2m1_all +25-08-31 09:39:16 | D | + x: sfp4_e2m1_all +25-08-31 09:39:16 | D | + y: None +25-08-31 09:39:16 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:39:16 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:39:16 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:39:17 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:39:36 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:39:36 | D | - error = [ 1079.7254, 1057.4899, 1043.5198, 1036.6068, 1031.5611, 1026.8626, 1024.2001, 1022.7245, 1020.4845, 1018.7721] +25-08-31 09:39:36 | D | - best error = [ 1079.7254, 1057.4899, 1043.5198, 1036.6068, 1031.5611, 1026.8626, 1024.2001, 1022.7245, 1020.4845, 1018.7721] +25-08-31 09:39:55 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 09:39:55 | D | - error = [ 1018.4702, 1016.3954, 1015.6474, 1014.9546, 1014.1336, 1012.5972, 1012.0078, 1011.5279, 1011.1566, 1010.9123] +25-08-31 09:39:55 | D | - best error = [ 1018.4702, 1016.3954, 1015.6474, 1014.9546, 1014.1336, 1012.5972, 1012.0078, 1011.5279, 1011.1566, 1010.9123] +25-08-31 09:40:04 | D | - iter = [ 20, 21, 22, 23, 24] +25-08-31 09:40:04 | D | - error = [ 1010.2157, 1009.6201, 1009.0315, 1008.8631, 1008.9215] +25-08-31 09:40:04 | D | - best error = [ 1010.2157, 1009.6201, 1009.0315, 1008.8631, 1008.8631] +25-08-31 09:40:04 | D | + Adding low-rank branches to transformer_blocks.1.ff.net.0.proj +25-08-31 09:40:05 | D | - Calibrating low-rank branch for transformer_blocks.1.ff.net.2 +25-08-31 09:40:05 | D | + w: sfp4_e2m1_all +25-08-31 09:40:05 | D | + x: sfp4_e2m1_all +25-08-31 09:40:05 | D | + y: None +25-08-31 09:40:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:40:05 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:40:05 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:40:07 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:40:33 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:40:33 | D | - error = [ 2014.8602, 1964.8251, 1934.1628, 1911.8322, 1894.5870, 1879.6506, 1867.5567, 1859.7966, 1853.2625, 1847.2083] +25-08-31 09:40:33 | D | - best error = [ 2014.8602, 1964.8251, 1934.1628, 1911.8322, 1894.5870, 1879.6506, 1867.5567, 1859.7966, 1853.2625, 1847.2083] +25-08-31 09:40:58 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 09:40:58 | D | - error = [ 1841.0146, 1835.2961, 1832.2622, 1828.4461, 1825.5468, 1821.9379, 1819.8946, 1817.1467, 1815.6089, 1812.7542] +25-08-31 09:40:58 | D | - best error = [ 1841.0146, 1835.2961, 1832.2622, 1828.4461, 1825.5468, 1821.9379, 1819.8946, 1817.1467, 1815.6089, 1812.7542] +25-08-31 09:41:16 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26] +25-08-31 09:41:16 | D | - error = [ 1811.8333, 1809.1660, 1806.9878, 1805.1512, 1803.7554, 1802.4825, 1802.5084] +25-08-31 09:41:16 | D | - best error = [ 1811.8333, 1809.1660, 1806.9878, 1805.1512, 1803.7554, 1802.4825, 1802.4825] +25-08-31 09:41:16 | D | + Adding low-rank branches to transformer_blocks.1.ff.net.2 +25-08-31 09:41:17 | D | - Calibrating low-rank branch for transformer_blocks.1.ff_context.net.0.proj +25-08-31 09:41:17 | D | + w: sfp4_e2m1_all +25-08-31 09:41:17 | D | + x: sfp4_e2m1_all +25-08-31 09:41:17 | D | + y: None +25-08-31 09:41:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:41:17 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:41:17 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:41:17 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:41:23 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-31 09:41:23 | D | - error = [ 1827.1854, 1808.7196, 1789.9439, 1775.6716, 1777.4112] +25-08-31 09:41:23 | D | - best error = [ 1827.1854, 1808.7196, 1789.9439, 1775.6716, 1775.6716] +25-08-31 09:41:23 | D | + Adding low-rank branches to transformer_blocks.1.ff_context.net.0.proj +25-08-31 09:41:23 | D | - Calibrating low-rank branch for transformer_blocks.1.ff_context.net.2 +25-08-31 09:41:23 | D | + w: sfp4_e2m1_all +25-08-31 09:41:23 | D | + x: sfp4_e2m1_all +25-08-31 09:41:23 | D | + y: None +25-08-31 09:41:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:41:23 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:41:23 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:41:24 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:41:30 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-31 09:41:30 | D | - error = [ 547.1155, 545.0668, 542.2858, 540.4460, 542.0379] +25-08-31 09:41:30 | D | - best error = [ 547.1155, 545.0668, 542.2858, 540.4460, 540.4460] +25-08-31 09:41:30 | D | + Adding low-rank branches to transformer_blocks.1.ff_context.net.2 +25-08-31 09:41:47 | D | - Calibrating low-rank branches of block transformer_blocks.2 +25-08-31 09:41:47 | D | - Calibrating low-rank branch for transformer_blocks.2.attn.to_q, transformer_blocks.2.attn.to_k, transformer_blocks.2.attn.to_v +25-08-31 09:41:47 | D | + w: sfp4_e2m1_all +25-08-31 09:41:47 | D | + x: sfp4_e2m1_all +25-08-31 09:41:47 | D | + y: None +25-08-31 09:41:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:41:47 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:41:47 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:41:48 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:42:03 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 09:42:03 | D | - error = [ 922.5383, 850.8448, 834.9811, 826.1601, 787.3193, 775.7821, 756.3388, 774.1694] +25-08-31 09:42:03 | D | - best error = [ 922.5383, 850.8448, 834.9811, 826.1601, 787.3193, 775.7821, 756.3388, 756.3388] +25-08-31 09:42:03 | D | + Adding low-rank branches to transformer_blocks.2.attn.to_q, transformer_blocks.2.attn.to_k, transformer_blocks.2.attn.to_v +25-08-31 09:42:03 | D | - Calibrating low-rank branch for transformer_blocks.2.attn.add_q_proj, transformer_blocks.2.attn.add_k_proj, transformer_blocks.2.attn.add_v_proj +25-08-31 09:42:03 | D | + w: sfp4_e2m1_all +25-08-31 09:42:03 | D | + x: sfp4_e2m1_all +25-08-31 09:42:03 | D | + y: None +25-08-31 09:42:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:42:03 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:42:03 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:42:04 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:42:07 | D | - iter = [ 0, 1] +25-08-31 09:42:07 | D | - error = [ 360.7186, 366.2095] +25-08-31 09:42:07 | D | - best error = [ 360.7186, 360.7186] +25-08-31 09:42:07 | D | + Adding low-rank branches to transformer_blocks.2.attn.add_q_proj, transformer_blocks.2.attn.add_k_proj, transformer_blocks.2.attn.add_v_proj +25-08-31 09:42:08 | D | - Calibrating low-rank branch for transformer_blocks.2.attn.to_out.0 +25-08-31 09:42:08 | D | + w: sfp4_e2m1_all +25-08-31 09:42:08 | D | + x: sfp4_e2m1_all +25-08-31 09:42:08 | D | + y: None +25-08-31 09:42:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:42:08 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:42:08 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:42:09 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:42:20 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 09:42:20 | D | - error = [ 2176.6181, 2122.6483, 2088.0096, 2063.8465, 2046.1244, 2034.5270, 2026.8553, 2026.8709] +25-08-31 09:42:20 | D | - best error = [ 2176.6181, 2122.6483, 2088.0096, 2063.8465, 2046.1244, 2034.5270, 2026.8553, 2026.8553] +25-08-31 09:42:20 | D | + Adding low-rank branches to transformer_blocks.2.attn.to_out.0 +25-08-31 09:42:21 | D | - Calibrating low-rank branch for transformer_blocks.2.attn.to_add_out +25-08-31 09:42:21 | D | + w: sfp4_e2m1_all +25-08-31 09:42:21 | D | + x: sfp4_e2m1_all +25-08-31 09:42:21 | D | + y: None +25-08-31 09:42:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:42:21 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:42:21 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:42:21 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:42:29 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 09:42:29 | D | - error = [ 891.2544, 881.4543, 875.1436, 869.4937, 865.7622, 859.2617, 858.5693, 859.3735] +25-08-31 09:42:29 | D | - best error = [ 891.2544, 881.4543, 875.1436, 869.4937, 865.7622, 859.2617, 858.5693, 858.5693] +25-08-31 09:42:29 | D | + Adding low-rank branches to transformer_blocks.2.attn.to_add_out +25-08-31 09:42:29 | D | - Calibrating low-rank branch for transformer_blocks.2.ff.net.0.proj +25-08-31 09:42:29 | D | + w: sfp4_e2m1_all +25-08-31 09:42:29 | D | + x: sfp4_e2m1_all +25-08-31 09:42:29 | D | + y: None +25-08-31 09:42:29 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:42:29 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:42:29 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:42:30 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:42:49 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:42:49 | D | - error = [ 1031.1603, 1011.5756, 1002.6690, 996.5587, 992.7580, 989.2352, 987.0388, 984.9762, 983.2339, 982.5101] +25-08-31 09:42:49 | D | - best error = [ 1031.1603, 1011.5756, 1002.6690, 996.5587, 992.7580, 989.2352, 987.0388, 984.9762, 983.2339, 982.5101] +25-08-31 09:43:08 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 09:43:08 | D | - error = [ 981.3779, 980.5225, 979.7332, 979.1714, 978.6434, 978.0405, 977.7204, 976.7505, 976.1396, 975.7867] +25-08-31 09:43:08 | D | - best error = [ 981.3779, 980.5225, 979.7332, 979.1714, 978.6434, 978.0405, 977.7204, 976.7505, 976.1396, 975.7867] +25-08-31 09:43:20 | D | - iter = [ 20, 21, 22, 23, 24, 25] +25-08-31 09:43:20 | D | - error = [ 975.2090, 974.8233, 974.8076, 974.7285, 974.5202, 974.6371] +25-08-31 09:43:20 | D | - best error = [ 975.2090, 974.8233, 974.8076, 974.7285, 974.5202, 974.5202] +25-08-31 09:43:20 | D | + Adding low-rank branches to transformer_blocks.2.ff.net.0.proj +25-08-31 09:43:20 | D | - Calibrating low-rank branch for transformer_blocks.2.ff.net.2 +25-08-31 09:43:20 | D | + w: sfp4_e2m1_all +25-08-31 09:43:20 | D | + x: sfp4_e2m1_all +25-08-31 09:43:20 | D | + y: None +25-08-31 09:43:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:43:20 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:43:20 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:43:22 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:43:47 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:43:47 | D | - error = [ 1496.0570, 1462.3976, 1440.8434, 1425.5087, 1414.4465, 1404.8353, 1396.6032, 1390.4756, 1386.2223, 1381.9480] +25-08-31 09:43:47 | D | - best error = [ 1496.0570, 1462.3976, 1440.8434, 1425.5087, 1414.4465, 1404.8353, 1396.6032, 1390.4756, 1386.2223, 1381.9480] +25-08-31 09:44:05 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16] +25-08-31 09:44:05 | D | - error = [ 1377.8622, 1371.8212, 1366.0581, 1365.2351, 1364.9983, 1364.3046, 1364.9195] +25-08-31 09:44:05 | D | - best error = [ 1377.8622, 1371.8212, 1366.0581, 1365.2351, 1364.9983, 1364.3046, 1364.3046] +25-08-31 09:44:05 | D | + Adding low-rank branches to transformer_blocks.2.ff.net.2 +25-08-31 09:44:05 | D | - Calibrating low-rank branch for transformer_blocks.2.ff_context.net.0.proj +25-08-31 09:44:05 | D | + w: sfp4_e2m1_all +25-08-31 09:44:05 | D | + x: sfp4_e2m1_all +25-08-31 09:44:05 | D | + y: None +25-08-31 09:44:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:44:05 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:44:05 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:44:06 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:44:17 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:44:17 | D | - error = [ 1547.4968, 1514.7090, 1508.4184, 1482.5038, 1472.4666, 1461.0421, 1456.1235, 1449.7263, 1445.4253, 1440.3329] +25-08-31 09:44:17 | D | - best error = [ 1547.4968, 1514.7090, 1508.4184, 1482.5038, 1472.4666, 1461.0421, 1456.1235, 1449.7263, 1445.4253, 1440.3329] +25-08-31 09:44:20 | D | - iter = [ 10, 11, 12] +25-08-31 09:44:20 | D | - error = [ 1438.8438, 1435.9269, 1436.1426] +25-08-31 09:44:20 | D | - best error = [ 1438.8438, 1435.9269, 1435.9269] +25-08-31 09:44:21 | D | + Adding low-rank branches to transformer_blocks.2.ff_context.net.0.proj +25-08-31 09:44:21 | D | - Calibrating low-rank branch for transformer_blocks.2.ff_context.net.2 +25-08-31 09:44:21 | D | + w: sfp4_e2m1_all +25-08-31 09:44:21 | D | + x: sfp4_e2m1_all +25-08-31 09:44:21 | D | + y: None +25-08-31 09:44:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:44:21 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:44:21 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:44:22 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:44:34 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:44:34 | D | - error = [ 288.3144, 285.0588, 283.0327, 281.1825, 279.5440, 278.6002, 277.9030, 277.4851, 276.9621, 276.7801] +25-08-31 09:44:34 | D | - best error = [ 288.3144, 285.0588, 283.0327, 281.1825, 279.5440, 278.6002, 277.9030, 277.4851, 276.9621, 276.7801] +25-08-31 09:44:38 | D | - iter = [ 10, 11, 12, 13] +25-08-31 09:44:38 | D | - error = [ 276.3688, 276.3636, 275.4738, 275.8752] +25-08-31 09:44:38 | D | - best error = [ 276.3688, 276.3636, 275.4738, 275.4738] +25-08-31 09:44:38 | D | + Adding low-rank branches to transformer_blocks.2.ff_context.net.2 +25-08-31 09:44:55 | D | - Calibrating low-rank branches of block transformer_blocks.3 +25-08-31 09:44:55 | D | - Calibrating low-rank branch for transformer_blocks.3.attn.to_q, transformer_blocks.3.attn.to_k, transformer_blocks.3.attn.to_v +25-08-31 09:44:55 | D | + w: sfp4_e2m1_all +25-08-31 09:44:55 | D | + x: sfp4_e2m1_all +25-08-31 09:44:55 | D | + y: None +25-08-31 09:44:55 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:44:55 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:44:55 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:44:56 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:45:04 | D | - iter = [ 0, 1, 2, 3] +25-08-31 09:45:04 | D | - error = [ 1156.9892, 1102.4482, 1086.7267, 1090.7820] +25-08-31 09:45:04 | D | - best error = [ 1156.9892, 1102.4482, 1086.7267, 1086.7267] +25-08-31 09:45:04 | D | + Adding low-rank branches to transformer_blocks.3.attn.to_q, transformer_blocks.3.attn.to_k, transformer_blocks.3.attn.to_v +25-08-31 09:45:04 | D | - Calibrating low-rank branch for transformer_blocks.3.attn.add_q_proj, transformer_blocks.3.attn.add_k_proj, transformer_blocks.3.attn.add_v_proj +25-08-31 09:45:04 | D | + w: sfp4_e2m1_all +25-08-31 09:45:04 | D | + x: sfp4_e2m1_all +25-08-31 09:45:04 | D | + y: None +25-08-31 09:45:04 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:45:04 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:45:04 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:45:05 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:45:13 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-31 09:45:13 | D | - error = [ 558.0333, 535.9082, 522.0604, 509.4934, 510.3207] +25-08-31 09:45:13 | D | - best error = [ 558.0333, 535.9082, 522.0604, 509.4934, 509.4934] +25-08-31 09:45:14 | D | + Adding low-rank branches to transformer_blocks.3.attn.add_q_proj, transformer_blocks.3.attn.add_k_proj, transformer_blocks.3.attn.add_v_proj +25-08-31 09:45:14 | D | - Calibrating low-rank branch for transformer_blocks.3.attn.to_out.0 +25-08-31 09:45:14 | D | + w: sfp4_e2m1_all +25-08-31 09:45:14 | D | + x: sfp4_e2m1_all +25-08-31 09:45:14 | D | + y: None +25-08-31 09:45:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:45:14 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:45:14 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:45:14 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:45:23 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 09:45:23 | D | - error = [ 2428.7779, 2396.6087, 2358.9989, 2333.1466, 2311.3378, 2318.9124] +25-08-31 09:45:23 | D | - best error = [ 2428.7779, 2396.6087, 2358.9989, 2333.1466, 2311.3378, 2311.3378] +25-08-31 09:45:23 | D | + Adding low-rank branches to transformer_blocks.3.attn.to_out.0 +25-08-31 09:45:23 | D | - Calibrating low-rank branch for transformer_blocks.3.attn.to_add_out +25-08-31 09:45:23 | D | + w: sfp4_e2m1_all +25-08-31 09:45:23 | D | + x: sfp4_e2m1_all +25-08-31 09:45:23 | D | + y: None +25-08-31 09:45:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:45:23 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:45:23 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:45:24 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:45:29 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-31 09:45:29 | D | - error = [ 989.7123, 984.7914, 982.0675, 981.0943, 981.2304] +25-08-31 09:45:29 | D | - best error = [ 989.7123, 984.7914, 982.0675, 981.0943, 981.0943] +25-08-31 09:45:29 | D | + Adding low-rank branches to transformer_blocks.3.attn.to_add_out +25-08-31 09:45:29 | D | - Calibrating low-rank branch for transformer_blocks.3.ff.net.0.proj +25-08-31 09:45:29 | D | + w: sfp4_e2m1_all +25-08-31 09:45:29 | D | + x: sfp4_e2m1_all +25-08-31 09:45:29 | D | + y: None +25-08-31 09:45:29 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:45:29 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:45:29 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:45:30 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:45:49 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:45:49 | D | - error = [ 2055.3615, 2024.0459, 2008.3798, 1995.8781, 1988.9854, 1984.9196, 1980.0994, 1978.9976, 1976.4161, 1973.8506] +25-08-31 09:45:49 | D | - best error = [ 2055.3615, 2024.0459, 2008.3798, 1995.8781, 1988.9854, 1984.9196, 1980.0994, 1978.9976, 1976.4161, 1973.8506] +25-08-31 09:45:57 | D | - iter = [ 10, 11, 12, 13] +25-08-31 09:45:57 | D | - error = [ 1972.3124, 1969.3545, 1968.9024, 1969.8376] +25-08-31 09:45:57 | D | - best error = [ 1972.3124, 1969.3545, 1968.9024, 1968.9024] +25-08-31 09:45:57 | D | + Adding low-rank branches to transformer_blocks.3.ff.net.0.proj +25-08-31 09:45:57 | D | - Calibrating low-rank branch for transformer_blocks.3.ff.net.2 +25-08-31 09:45:57 | D | + w: sfp4_e2m1_all +25-08-31 09:45:57 | D | + x: sfp4_e2m1_all +25-08-31 09:45:57 | D | + y: None +25-08-31 09:45:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:45:57 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:45:57 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:46:00 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:46:26 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:46:26 | D | - error = [ 1690.8552, 1672.4465, 1660.0361, 1651.3548, 1644.8687, 1641.6185, 1637.7628, 1632.5940, 1629.7641, 1627.7574] +25-08-31 09:46:26 | D | - best error = [ 1690.8552, 1672.4465, 1660.0361, 1651.3548, 1644.8687, 1641.6185, 1637.7628, 1632.5940, 1629.7641, 1627.7574] +25-08-31 09:46:31 | D | - iter = [ 10, 11] +25-08-31 09:46:31 | D | - error = [ 1626.6987, 1639.8239] +25-08-31 09:46:31 | D | - best error = [ 1626.6987, 1626.6987] +25-08-31 09:46:32 | D | + Adding low-rank branches to transformer_blocks.3.ff.net.2 +25-08-31 09:46:32 | D | - Calibrating low-rank branch for transformer_blocks.3.ff_context.net.0.proj +25-08-31 09:46:32 | D | + w: sfp4_e2m1_all +25-08-31 09:46:32 | D | + x: sfp4_e2m1_all +25-08-31 09:46:32 | D | + y: None +25-08-31 09:46:32 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:46:32 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:46:32 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:46:32 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:46:42 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 09:46:42 | D | - error = [ 923.5050, 908.1451, 890.7578, 881.6600, 871.7472, 865.4784, 859.9579, 858.8457, 862.4335] +25-08-31 09:46:42 | D | - best error = [ 923.5050, 908.1451, 890.7578, 881.6600, 871.7472, 865.4784, 859.9579, 858.8457, 858.8457] +25-08-31 09:46:42 | D | + Adding low-rank branches to transformer_blocks.3.ff_context.net.0.proj +25-08-31 09:46:43 | D | - Calibrating low-rank branch for transformer_blocks.3.ff_context.net.2 +25-08-31 09:46:43 | D | + w: sfp4_e2m1_all +25-08-31 09:46:43 | D | + x: sfp4_e2m1_all +25-08-31 09:46:43 | D | + y: None +25-08-31 09:46:43 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:46:43 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:46:43 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:46:43 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:46:54 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 09:46:54 | D | - error = [ 244.6210, 241.7368, 239.8506, 239.0872, 237.9960, 237.4541, 237.1103, 236.6251, 237.7870] +25-08-31 09:46:54 | D | - best error = [ 244.6210, 241.7368, 239.8506, 239.0872, 237.9960, 237.4541, 237.1103, 236.6251, 236.6251] +25-08-31 09:46:54 | D | + Adding low-rank branches to transformer_blocks.3.ff_context.net.2 +25-08-31 09:47:11 | D | - Calibrating low-rank branches of block transformer_blocks.4 +25-08-31 09:47:11 | D | - Calibrating low-rank branch for transformer_blocks.4.attn.to_q, transformer_blocks.4.attn.to_k, transformer_blocks.4.attn.to_v +25-08-31 09:47:11 | D | + w: sfp4_e2m1_all +25-08-31 09:47:11 | D | + x: sfp4_e2m1_all +25-08-31 09:47:11 | D | + y: None +25-08-31 09:47:11 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:47:11 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:47:11 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:47:12 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:47:27 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 09:47:27 | D | - error = [ 1494.3248, 1440.3267, 1396.8068, 1384.2172, 1382.0984, 1364.3828, 1331.6912, 1338.7785] +25-08-31 09:47:27 | D | - best error = [ 1494.3248, 1440.3267, 1396.8068, 1384.2172, 1382.0984, 1364.3828, 1331.6912, 1331.6912] +25-08-31 09:47:27 | D | + Adding low-rank branches to transformer_blocks.4.attn.to_q, transformer_blocks.4.attn.to_k, transformer_blocks.4.attn.to_v +25-08-31 09:47:28 | D | - Calibrating low-rank branch for transformer_blocks.4.attn.add_q_proj, transformer_blocks.4.attn.add_k_proj, transformer_blocks.4.attn.add_v_proj +25-08-31 09:47:28 | D | + w: sfp4_e2m1_all +25-08-31 09:47:28 | D | + x: sfp4_e2m1_all +25-08-31 09:47:28 | D | + y: None +25-08-31 09:47:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:47:28 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:47:28 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:47:29 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:47:37 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-31 09:47:37 | D | - error = [ 577.0351, 571.3226, 552.5313, 548.3644, 556.5597] +25-08-31 09:47:37 | D | - best error = [ 577.0351, 571.3226, 552.5313, 548.3644, 548.3644] +25-08-31 09:47:37 | D | + Adding low-rank branches to transformer_blocks.4.attn.add_q_proj, transformer_blocks.4.attn.add_k_proj, transformer_blocks.4.attn.add_v_proj +25-08-31 09:47:37 | D | - Calibrating low-rank branch for transformer_blocks.4.attn.to_out.0 +25-08-31 09:47:37 | D | + w: sfp4_e2m1_all +25-08-31 09:47:37 | D | + x: sfp4_e2m1_all +25-08-31 09:47:37 | D | + y: None +25-08-31 09:47:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:47:37 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:47:37 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:47:38 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:47:52 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:47:52 | D | - error = [ 3114.6160, 3072.3779, 3047.6125, 3020.3930, 3004.0834, 2990.1679, 2980.5508, 2972.8838, 2965.8594, 2959.9885] +25-08-31 09:47:52 | D | - best error = [ 3114.6160, 3072.3779, 3047.6125, 3020.3930, 3004.0834, 2990.1679, 2980.5508, 2972.8838, 2965.8594, 2959.9885] +25-08-31 09:48:00 | D | - iter = [ 10, 11, 12, 13, 14, 15] +25-08-31 09:48:00 | D | - error = [ 2954.7815, 2950.7941, 2946.9725, 2943.5351, 2941.3186, 2953.2448] +25-08-31 09:48:00 | D | - best error = [ 2954.7815, 2950.7941, 2946.9725, 2943.5351, 2941.3186, 2941.3186] +25-08-31 09:48:01 | D | + Adding low-rank branches to transformer_blocks.4.attn.to_out.0 +25-08-31 09:48:01 | D | - Calibrating low-rank branch for transformer_blocks.4.attn.to_add_out +25-08-31 09:48:01 | D | + w: sfp4_e2m1_all +25-08-31 09:48:01 | D | + x: sfp4_e2m1_all +25-08-31 09:48:01 | D | + y: None +25-08-31 09:48:01 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:48:01 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:48:01 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:48:01 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:48:11 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:48:11 | D | - error = [ 876.9917, 870.9688, 866.1967, 863.2487, 858.1655, 857.3096, 853.6824, 852.3765, 851.4022, 848.9408] +25-08-31 09:48:11 | D | - best error = [ 876.9917, 870.9688, 866.1967, 863.2487, 858.1655, 857.3096, 853.6824, 852.3765, 851.4022, 848.9408] +25-08-31 09:48:13 | D | - iter = [ 10, 11] +25-08-31 09:48:13 | D | - error = [ 848.8730, 849.2253] +25-08-31 09:48:13 | D | - best error = [ 848.8730, 848.8730] +25-08-31 09:48:14 | D | + Adding low-rank branches to transformer_blocks.4.attn.to_add_out +25-08-31 09:48:14 | D | - Calibrating low-rank branch for transformer_blocks.4.ff.net.0.proj +25-08-31 09:48:14 | D | + w: sfp4_e2m1_all +25-08-31 09:48:14 | D | + x: sfp4_e2m1_all +25-08-31 09:48:14 | D | + y: None +25-08-31 09:48:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:48:14 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:48:14 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:48:15 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:48:30 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 09:48:30 | D | - error = [ 4513.4147, 4457.6958, 4425.6597, 4392.3172, 4367.8340, 4367.3470, 4345.1757, 4349.1215] +25-08-31 09:48:30 | D | - best error = [ 4513.4147, 4457.6958, 4425.6597, 4392.3172, 4367.8340, 4367.3470, 4345.1757, 4345.1757] +25-08-31 09:48:30 | D | + Adding low-rank branches to transformer_blocks.4.ff.net.0.proj +25-08-31 09:48:30 | D | - Calibrating low-rank branch for transformer_blocks.4.ff.net.2 +25-08-31 09:48:30 | D | + w: sfp4_e2m1_all +25-08-31 09:48:30 | D | + x: sfp4_e2m1_all +25-08-31 09:48:30 | D | + y: None +25-08-31 09:48:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:48:30 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:48:30 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:48:33 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:49:00 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:49:00 | D | - error = [ 3695.1520, 3670.9676, 3654.2381, 3643.2153, 3631.5110, 3622.4298, 3616.2259, 3610.9957, 3606.7695, 3602.4450] +25-08-31 09:49:00 | D | - best error = [ 3695.1520, 3670.9676, 3654.2381, 3643.2153, 3631.5110, 3622.4298, 3616.2259, 3610.9957, 3606.7695, 3602.4450] +25-08-31 09:49:26 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 09:49:26 | D | - error = [ 3599.0910, 3595.3727, 3592.8178, 3589.4261, 3587.3381, 3585.9781, 3584.5145, 3582.3231, 3581.3069, 3580.9567] +25-08-31 09:49:26 | D | - best error = [ 3599.0910, 3595.3727, 3592.8178, 3589.4261, 3587.3381, 3585.9781, 3584.5145, 3582.3231, 3581.3069, 3580.9567] +25-08-31 09:49:54 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 09:49:54 | D | - error = [ 3580.2857, 3580.2221, 3578.0618, 3576.7006, 3576.0326, 3573.0470, 3572.9512, 3571.6918, 3571.2033, 3569.0874] +25-08-31 09:49:54 | D | - best error = [ 3580.2857, 3580.2221, 3578.0618, 3576.7006, 3576.0326, 3573.0470, 3572.9512, 3571.6918, 3571.2033, 3569.0874] +25-08-31 09:50:20 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38] +25-08-31 09:50:20 | D | - error = [ 3568.0483, 3567.2213, 3566.9549, 3566.0745, 3564.3255, 3564.0340, 3563.0094, 3562.4079, 3562.7179] +25-08-31 09:50:20 | D | - best error = [ 3568.0483, 3567.2213, 3566.9549, 3566.0745, 3564.3255, 3564.0340, 3563.0094, 3562.4079, 3562.4079] +25-08-31 09:50:20 | D | + Adding low-rank branches to transformer_blocks.4.ff.net.2 +25-08-31 09:50:20 | D | - Calibrating low-rank branch for transformer_blocks.4.ff_context.net.0.proj +25-08-31 09:50:20 | D | + w: sfp4_e2m1_all +25-08-31 09:50:20 | D | + x: sfp4_e2m1_all +25-08-31 09:50:20 | D | + y: None +25-08-31 09:50:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:50:20 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:50:20 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:50:21 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:50:32 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:50:32 | D | - error = [ 899.9010, 876.2798, 861.5169, 858.1440, 843.6610, 835.0416, 831.0865, 827.5523, 824.2678, 821.2213] +25-08-31 09:50:32 | D | - best error = [ 899.9010, 876.2798, 861.5169, 858.1440, 843.6610, 835.0416, 831.0865, 827.5523, 824.2678, 821.2213] +25-08-31 09:50:40 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16] +25-08-31 09:50:40 | D | - error = [ 819.3421, 818.0298, 816.8837, 816.0067, 815.5202, 812.3242, 812.8196] +25-08-31 09:50:40 | D | - best error = [ 819.3421, 818.0298, 816.8837, 816.0067, 815.5202, 812.3242, 812.3242] +25-08-31 09:50:40 | D | + Adding low-rank branches to transformer_blocks.4.ff_context.net.0.proj +25-08-31 09:50:40 | D | - Calibrating low-rank branch for transformer_blocks.4.ff_context.net.2 +25-08-31 09:50:40 | D | + w: sfp4_e2m1_all +25-08-31 09:50:40 | D | + x: sfp4_e2m1_all +25-08-31 09:50:40 | D | + y: None +25-08-31 09:50:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:50:40 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:50:40 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:50:41 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:50:49 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 09:50:49 | D | - error = [ 261.1685, 258.7620, 257.0697, 255.8672, 254.8895, 255.0751] +25-08-31 09:50:49 | D | - best error = [ 261.1685, 258.7620, 257.0697, 255.8672, 254.8895, 254.8895] +25-08-31 09:50:49 | D | + Adding low-rank branches to transformer_blocks.4.ff_context.net.2 +25-08-31 09:51:06 | D | - Calibrating low-rank branches of block transformer_blocks.5 +25-08-31 09:51:06 | D | - Calibrating low-rank branch for transformer_blocks.5.attn.to_q, transformer_blocks.5.attn.to_k, transformer_blocks.5.attn.to_v +25-08-31 09:51:06 | D | + w: sfp4_e2m1_all +25-08-31 09:51:06 | D | + x: sfp4_e2m1_all +25-08-31 09:51:06 | D | + y: None +25-08-31 09:51:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:51:06 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:51:06 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:51:07 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:51:23 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 09:51:23 | D | - error = [ 1991.0359, 1957.1323, 1923.7396, 1919.9143, 1917.6321, 1904.2657, 1901.9150, 1923.6534] +25-08-31 09:51:23 | D | - best error = [ 1991.0359, 1957.1323, 1923.7396, 1919.9143, 1917.6321, 1904.2657, 1901.9150, 1901.9150] +25-08-31 09:51:23 | D | + Adding low-rank branches to transformer_blocks.5.attn.to_q, transformer_blocks.5.attn.to_k, transformer_blocks.5.attn.to_v +25-08-31 09:51:23 | D | - Calibrating low-rank branch for transformer_blocks.5.attn.add_q_proj, transformer_blocks.5.attn.add_k_proj, transformer_blocks.5.attn.add_v_proj +25-08-31 09:51:23 | D | + w: sfp4_e2m1_all +25-08-31 09:51:23 | D | + x: sfp4_e2m1_all +25-08-31 09:51:23 | D | + y: None +25-08-31 09:51:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:51:23 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:51:23 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:51:24 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:51:37 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 09:51:37 | D | - error = [ 385.8858, 377.6359, 369.0793, 364.8801, 363.6585, 362.1817, 361.3364, 361.5239] +25-08-31 09:51:37 | D | - best error = [ 385.8858, 377.6359, 369.0793, 364.8801, 363.6585, 362.1817, 361.3364, 361.3364] +25-08-31 09:51:37 | D | + Adding low-rank branches to transformer_blocks.5.attn.add_q_proj, transformer_blocks.5.attn.add_k_proj, transformer_blocks.5.attn.add_v_proj +25-08-31 09:51:37 | D | - Calibrating low-rank branch for transformer_blocks.5.attn.to_out.0 +25-08-31 09:51:37 | D | + w: sfp4_e2m1_all +25-08-31 09:51:37 | D | + x: sfp4_e2m1_all +25-08-31 09:51:37 | D | + y: None +25-08-31 09:51:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:51:37 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:51:37 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:51:39 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:51:53 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:51:53 | D | - error = [ 3273.8742, 3243.4599, 3218.7926, 3198.6028, 3183.2835, 3174.8962, 3165.5545, 3160.1147, 3153.1369, 3147.6479] +25-08-31 09:51:53 | D | - best error = [ 3273.8742, 3243.4599, 3218.7926, 3198.6028, 3183.2835, 3174.8962, 3165.5545, 3160.1147, 3153.1369, 3147.6479] +25-08-31 09:52:06 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18] +25-08-31 09:52:06 | D | - error = [ 3144.2048, 3142.7318, 3139.4412, 3139.2666, 3135.0802, 3131.2641, 3126.1974, 3125.2398, 3137.8295] +25-08-31 09:52:06 | D | - best error = [ 3144.2048, 3142.7318, 3139.4412, 3139.2666, 3135.0802, 3131.2641, 3126.1974, 3125.2398, 3125.2398] +25-08-31 09:52:06 | D | + Adding low-rank branches to transformer_blocks.5.attn.to_out.0 +25-08-31 09:52:06 | D | - Calibrating low-rank branch for transformer_blocks.5.attn.to_add_out +25-08-31 09:52:06 | D | + w: sfp4_e2m1_all +25-08-31 09:52:06 | D | + x: sfp4_e2m1_all +25-08-31 09:52:06 | D | + y: None +25-08-31 09:52:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:52:06 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:52:06 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:52:07 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:52:17 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:52:17 | D | - error = [ 925.4487, 916.9875, 911.4671, 908.1454, 903.3034, 900.5479, 897.6052, 895.2300, 892.9407, 892.0098] +25-08-31 09:52:17 | D | - best error = [ 925.4487, 916.9875, 911.4671, 908.1454, 903.3034, 900.5479, 897.6052, 895.2300, 892.9407, 892.0098] +25-08-31 09:52:25 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17] +25-08-31 09:52:25 | D | - error = [ 890.5045, 889.7188, 888.5314, 887.9125, 886.0156, 884.3267, 882.2279, 883.5301] +25-08-31 09:52:25 | D | - best error = [ 890.5045, 889.7188, 888.5314, 887.9125, 886.0156, 884.3267, 882.2279, 882.2279] +25-08-31 09:52:25 | D | + Adding low-rank branches to transformer_blocks.5.attn.to_add_out +25-08-31 09:52:25 | D | - Calibrating low-rank branch for transformer_blocks.5.ff.net.0.proj +25-08-31 09:52:25 | D | + w: sfp4_e2m1_all +25-08-31 09:52:25 | D | + x: sfp4_e2m1_all +25-08-31 09:52:25 | D | + y: None +25-08-31 09:52:25 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:52:25 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:52:25 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:52:27 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:52:47 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:52:47 | D | - error = [ 4897.3254, 4836.3602, 4781.0472, 4752.5164, 4715.4949, 4695.1552, 4682.8777, 4670.0739, 4663.3475, 4653.1250] +25-08-31 09:52:47 | D | - best error = [ 4897.3254, 4836.3602, 4781.0472, 4752.5164, 4715.4949, 4695.1552, 4682.8777, 4670.0739, 4663.3475, 4653.1250] +25-08-31 09:53:08 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 09:53:08 | D | - error = [ 4645.5129, 4640.5071, 4631.2925, 4626.3497, 4620.9406, 4617.0534, 4616.1222, 4612.5806, 4611.3237, 4608.0955] +25-08-31 09:53:08 | D | - best error = [ 4645.5129, 4640.5071, 4631.2925, 4626.3497, 4620.9406, 4617.0534, 4616.1222, 4612.5806, 4611.3237, 4608.0955] +25-08-31 09:53:28 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 09:53:28 | D | - error = [ 4607.5807, 4606.3276, 4602.0183, 4600.8982, 4598.9064, 4597.6026, 4594.5842, 4593.5180, 4592.7822, 4593.3787] +25-08-31 09:53:28 | D | - best error = [ 4607.5807, 4606.3276, 4602.0183, 4600.8982, 4598.9064, 4597.6026, 4594.5842, 4593.5180, 4592.7822, 4592.7822] +25-08-31 09:53:28 | D | + Adding low-rank branches to transformer_blocks.5.ff.net.0.proj +25-08-31 09:53:28 | D | - Calibrating low-rank branch for transformer_blocks.5.ff.net.2 +25-08-31 09:53:28 | D | + w: sfp4_e2m1_all +25-08-31 09:53:28 | D | + x: sfp4_e2m1_all +25-08-31 09:53:28 | D | + y: None +25-08-31 09:53:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:53:28 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:53:28 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:53:32 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:54:01 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:54:01 | D | - error = [ 2458.0480, 2440.9239, 2436.2872, 2426.3238, 2418.2592, 2413.1055, 2407.6941, 2403.3282, 2400.3143, 2397.0743] +25-08-31 09:54:01 | D | - best error = [ 2458.0480, 2440.9239, 2436.2872, 2426.3238, 2418.2592, 2413.1055, 2407.6941, 2403.3282, 2400.3143, 2397.0743] +25-08-31 09:54:16 | D | - iter = [ 10, 11, 12, 13, 14] +25-08-31 09:54:16 | D | - error = [ 2394.2966, 2391.7952, 2389.9134, 2387.9054, 2390.2591] +25-08-31 09:54:16 | D | - best error = [ 2394.2966, 2391.7952, 2389.9134, 2387.9054, 2387.9054] +25-08-31 09:54:16 | D | + Adding low-rank branches to transformer_blocks.5.ff.net.2 +25-08-31 09:54:16 | D | - Calibrating low-rank branch for transformer_blocks.5.ff_context.net.0.proj +25-08-31 09:54:16 | D | + w: sfp4_e2m1_all +25-08-31 09:54:16 | D | + x: sfp4_e2m1_all +25-08-31 09:54:16 | D | + y: None +25-08-31 09:54:16 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:54:16 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:54:16 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:54:17 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:54:21 | D | - iter = [ 0, 1, 2, 3] +25-08-31 09:54:21 | D | - error = [ 1691.3475, 1662.4618, 1646.9135, 1647.4567] +25-08-31 09:54:21 | D | - best error = [ 1691.3475, 1662.4618, 1646.9135, 1646.9135] +25-08-31 09:54:22 | D | + Adding low-rank branches to transformer_blocks.5.ff_context.net.0.proj +25-08-31 09:54:22 | D | - Calibrating low-rank branch for transformer_blocks.5.ff_context.net.2 +25-08-31 09:54:22 | D | + w: sfp4_e2m1_all +25-08-31 09:54:22 | D | + x: sfp4_e2m1_all +25-08-31 09:54:22 | D | + y: None +25-08-31 09:54:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:54:22 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:54:22 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:54:23 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:54:35 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:54:35 | D | - error = [ 324.3311, 321.2330, 318.9488, 317.3754, 316.1332, 315.5787, 314.7559, 313.7684, 313.2926, 313.0757] +25-08-31 09:54:35 | D | - best error = [ 324.3311, 321.2330, 318.9488, 317.3754, 316.1332, 315.5787, 314.7559, 313.7684, 313.2926, 313.0757] +25-08-31 09:54:45 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17] +25-08-31 09:54:45 | D | - error = [ 312.4139, 312.1213, 311.8733, 311.3613, 310.9927, 310.8955, 310.6962, 311.5741] +25-08-31 09:54:45 | D | - best error = [ 312.4139, 312.1213, 311.8733, 311.3613, 310.9927, 310.8955, 310.6962, 310.6962] +25-08-31 09:54:45 | D | + Adding low-rank branches to transformer_blocks.5.ff_context.net.2 +25-08-31 09:55:03 | D | - Calibrating low-rank branches of block transformer_blocks.6 +25-08-31 09:55:03 | D | - Calibrating low-rank branch for transformer_blocks.6.attn.to_q, transformer_blocks.6.attn.to_k, transformer_blocks.6.attn.to_v +25-08-31 09:55:03 | D | + w: sfp4_e2m1_all +25-08-31 09:55:03 | D | + x: sfp4_e2m1_all +25-08-31 09:55:03 | D | + y: None +25-08-31 09:55:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:55:03 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:55:03 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:55:04 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:55:21 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 09:55:21 | D | - error = [ 1700.5318, 1650.2010, 1609.1867, 1560.1088, 1553.5698, 1534.9875, 1533.0822, 1522.8491, 1526.4686] +25-08-31 09:55:21 | D | - best error = [ 1700.5318, 1650.2010, 1609.1867, 1560.1088, 1553.5698, 1534.9875, 1533.0822, 1522.8491, 1522.8491] +25-08-31 09:55:21 | D | + Adding low-rank branches to transformer_blocks.6.attn.to_q, transformer_blocks.6.attn.to_k, transformer_blocks.6.attn.to_v +25-08-31 09:55:21 | D | - Calibrating low-rank branch for transformer_blocks.6.attn.add_q_proj, transformer_blocks.6.attn.add_k_proj, transformer_blocks.6.attn.add_v_proj +25-08-31 09:55:21 | D | + w: sfp4_e2m1_all +25-08-31 09:55:21 | D | + x: sfp4_e2m1_all +25-08-31 09:55:21 | D | + y: None +25-08-31 09:55:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:55:21 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:55:21 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:55:22 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:55:39 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:55:39 | D | - error = [ 837.7929, 826.3771, 809.9016, 805.8949, 803.6594, 799.3379, 799.1830, 793.6136, 792.7375, 788.8816] +25-08-31 09:55:39 | D | - best error = [ 837.7929, 826.3771, 809.9016, 805.8949, 803.6594, 799.3379, 799.1830, 793.6136, 792.7375, 788.8816] +25-08-31 09:55:42 | D | - iter = [ 10, 11] +25-08-31 09:55:42 | D | - error = [ 787.1730, 790.3337] +25-08-31 09:55:42 | D | - best error = [ 787.1730, 787.1730] +25-08-31 09:55:43 | D | + Adding low-rank branches to transformer_blocks.6.attn.add_q_proj, transformer_blocks.6.attn.add_k_proj, transformer_blocks.6.attn.add_v_proj +25-08-31 09:55:43 | D | - Calibrating low-rank branch for transformer_blocks.6.attn.to_out.0 +25-08-31 09:55:43 | D | + w: sfp4_e2m1_all +25-08-31 09:55:43 | D | + x: sfp4_e2m1_all +25-08-31 09:55:43 | D | + y: None +25-08-31 09:55:43 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:55:43 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:55:43 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:55:44 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:55:59 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:55:59 | D | - error = [ 3839.3498, 3793.2146, 3759.6705, 3737.0439, 3722.4143, 3707.8159, 3698.0312, 3685.5880, 3677.2091, 3670.4319] +25-08-31 09:55:59 | D | - best error = [ 3839.3498, 3793.2146, 3759.6705, 3737.0439, 3722.4143, 3707.8159, 3698.0312, 3685.5880, 3677.2091, 3670.4319] +25-08-31 09:56:14 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 09:56:14 | D | - error = [ 3664.2309, 3662.0759, 3655.8556, 3653.1961, 3649.8949, 3646.5512, 3643.5599, 3641.6815, 3639.3079, 3638.7099] +25-08-31 09:56:14 | D | - best error = [ 3664.2309, 3662.0759, 3655.8556, 3653.1961, 3649.8949, 3646.5512, 3643.5599, 3641.6815, 3639.3079, 3638.7099] +25-08-31 09:56:15 | D | - iter = [ 20] +25-08-31 09:56:15 | D | - error = [ 3639.4162] +25-08-31 09:56:15 | D | - best error = [ 3638.7099] +25-08-31 09:56:15 | D | + Adding low-rank branches to transformer_blocks.6.attn.to_out.0 +25-08-31 09:56:16 | D | - Calibrating low-rank branch for transformer_blocks.6.attn.to_add_out +25-08-31 09:56:16 | D | + w: sfp4_e2m1_all +25-08-31 09:56:16 | D | + x: sfp4_e2m1_all +25-08-31 09:56:16 | D | + y: None +25-08-31 09:56:16 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:56:16 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:56:16 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:56:16 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:56:23 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 09:56:23 | D | - error = [ 1286.3616, 1267.3577, 1257.7260, 1249.1834, 1246.5746, 1244.7923, 1250.9605] +25-08-31 09:56:23 | D | - best error = [ 1286.3616, 1267.3577, 1257.7260, 1249.1834, 1246.5746, 1244.7923, 1244.7923] +25-08-31 09:56:24 | D | + Adding low-rank branches to transformer_blocks.6.attn.to_add_out +25-08-31 09:56:24 | D | - Calibrating low-rank branch for transformer_blocks.6.ff.net.0.proj +25-08-31 09:56:24 | D | + w: sfp4_e2m1_all +25-08-31 09:56:24 | D | + x: sfp4_e2m1_all +25-08-31 09:56:24 | D | + y: None +25-08-31 09:56:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:56:24 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:56:24 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:56:25 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:56:44 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 09:56:44 | D | - error = [ 5001.8389, 4924.7725, 4864.6463, 4852.4722, 4813.6265, 4789.5081, 4774.7905, 4758.7204, 4765.1829] +25-08-31 09:56:44 | D | - best error = [ 5001.8389, 4924.7725, 4864.6463, 4852.4722, 4813.6265, 4789.5081, 4774.7905, 4758.7204, 4758.7204] +25-08-31 09:56:44 | D | + Adding low-rank branches to transformer_blocks.6.ff.net.0.proj +25-08-31 09:56:44 | D | - Calibrating low-rank branch for transformer_blocks.6.ff.net.2 +25-08-31 09:56:44 | D | + w: sfp4_e2m1_all +25-08-31 09:56:44 | D | + x: sfp4_e2m1_all +25-08-31 09:56:44 | D | + y: None +25-08-31 09:56:44 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:56:44 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:56:44 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:56:48 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:57:18 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:57:18 | D | - error = [ 3462.1641, 3442.0241, 3427.8343, 3416.4141, 3406.0512, 3397.2227, 3390.7797, 3385.4472, 3380.6454, 3376.6403] +25-08-31 09:57:18 | D | - best error = [ 3462.1641, 3442.0241, 3427.8343, 3416.4141, 3406.0512, 3397.2227, 3390.7797, 3385.4472, 3380.6454, 3376.6403] +25-08-31 09:57:47 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 09:57:47 | D | - error = [ 3372.7238, 3369.2949, 3366.9654, 3364.6902, 3362.7356, 3361.2620, 3359.4642, 3357.4889, 3355.7367, 3355.3337] +25-08-31 09:57:47 | D | - best error = [ 3372.7238, 3369.2949, 3366.9654, 3364.6902, 3362.7356, 3361.2620, 3359.4642, 3357.4889, 3355.7367, 3355.3337] +25-08-31 09:58:03 | D | - iter = [ 20, 21, 22, 23, 24] +25-08-31 09:58:03 | D | - error = [ 3353.6580, 3352.1078, 3351.1162, 3350.0542, 3350.7393] +25-08-31 09:58:03 | D | - best error = [ 3353.6580, 3352.1078, 3351.1162, 3350.0542, 3350.0542] +25-08-31 09:58:03 | D | + Adding low-rank branches to transformer_blocks.6.ff.net.2 +25-08-31 09:58:03 | D | - Calibrating low-rank branch for transformer_blocks.6.ff_context.net.0.proj +25-08-31 09:58:03 | D | + w: sfp4_e2m1_all +25-08-31 09:58:03 | D | + x: sfp4_e2m1_all +25-08-31 09:58:03 | D | + y: None +25-08-31 09:58:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:58:03 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:58:03 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:58:04 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:58:14 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 09:58:14 | D | - error = [ 1067.1950, 1044.6190, 1025.5857, 1014.3196, 1004.9091, 998.0290, 995.6956, 990.6932, 1002.0161] +25-08-31 09:58:14 | D | - best error = [ 1067.1950, 1044.6190, 1025.5857, 1014.3196, 1004.9091, 998.0290, 995.6956, 990.6932, 990.6932] +25-08-31 09:58:14 | D | + Adding low-rank branches to transformer_blocks.6.ff_context.net.0.proj +25-08-31 09:58:14 | D | - Calibrating low-rank branch for transformer_blocks.6.ff_context.net.2 +25-08-31 09:58:14 | D | + w: sfp4_e2m1_all +25-08-31 09:58:14 | D | + x: sfp4_e2m1_all +25-08-31 09:58:14 | D | + y: None +25-08-31 09:58:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:58:14 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:58:14 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:58:15 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:58:28 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:58:28 | D | - error = [ 310.9120, 308.1778, 306.4420, 305.8343, 304.5846, 303.8135, 303.5185, 303.4000, 302.8292, 302.3976] +25-08-31 09:58:28 | D | - best error = [ 310.9120, 308.1778, 306.4420, 305.8343, 304.5846, 303.8135, 303.5185, 303.4000, 302.8292, 302.3976] +25-08-31 09:58:35 | D | - iter = [ 10, 11, 12, 13, 14, 15] +25-08-31 09:58:35 | D | - error = [ 302.2209, 301.7414, 301.2690, 301.0155, 300.9240, 301.0093] +25-08-31 09:58:35 | D | - best error = [ 302.2209, 301.7414, 301.2690, 301.0155, 300.9240, 300.9240] +25-08-31 09:58:35 | D | + Adding low-rank branches to transformer_blocks.6.ff_context.net.2 +25-08-31 09:58:53 | D | - Calibrating low-rank branches of block transformer_blocks.7 +25-08-31 09:58:53 | D | - Calibrating low-rank branch for transformer_blocks.7.attn.to_q, transformer_blocks.7.attn.to_k, transformer_blocks.7.attn.to_v +25-08-31 09:58:53 | D | + w: sfp4_e2m1_all +25-08-31 09:58:53 | D | + x: sfp4_e2m1_all +25-08-31 09:58:53 | D | + y: None +25-08-31 09:58:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:58:53 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:58:53 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:58:54 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:59:09 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 09:59:09 | D | - error = [ 2051.6709, 1993.9307, 1949.4803, 1914.7196, 1903.6894, 1899.6771, 1899.4393, 1907.5204] +25-08-31 09:59:09 | D | - best error = [ 2051.6709, 1993.9307, 1949.4803, 1914.7196, 1903.6894, 1899.6771, 1899.4393, 1899.4393] +25-08-31 09:59:09 | D | + Adding low-rank branches to transformer_blocks.7.attn.to_q, transformer_blocks.7.attn.to_k, transformer_blocks.7.attn.to_v +25-08-31 09:59:10 | D | - Calibrating low-rank branch for transformer_blocks.7.attn.add_q_proj, transformer_blocks.7.attn.add_k_proj, transformer_blocks.7.attn.add_v_proj +25-08-31 09:59:10 | D | + w: sfp4_e2m1_all +25-08-31 09:59:10 | D | + x: sfp4_e2m1_all +25-08-31 09:59:10 | D | + y: None +25-08-31 09:59:10 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:59:10 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:59:10 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:59:11 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:59:17 | D | - iter = [ 0, 1, 2, 3] +25-08-31 09:59:17 | D | - error = [ 618.9525, 593.6288, 582.6628, 586.5583] +25-08-31 09:59:17 | D | - best error = [ 618.9525, 593.6288, 582.6628, 582.6628] +25-08-31 09:59:17 | D | + Adding low-rank branches to transformer_blocks.7.attn.add_q_proj, transformer_blocks.7.attn.add_k_proj, transformer_blocks.7.attn.add_v_proj +25-08-31 09:59:18 | D | - Calibrating low-rank branch for transformer_blocks.7.attn.to_out.0 +25-08-31 09:59:18 | D | + w: sfp4_e2m1_all +25-08-31 09:59:18 | D | + x: sfp4_e2m1_all +25-08-31 09:59:18 | D | + y: None +25-08-31 09:59:18 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:59:18 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:59:18 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:59:19 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:59:33 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 09:59:33 | D | - error = [ 6098.3486, 6041.2936, 6001.6174, 5987.4181, 5955.4540, 5929.6054, 5912.2409, 5901.4768, 5890.7996, 5877.7666] +25-08-31 09:59:33 | D | - best error = [ 6098.3486, 6041.2936, 6001.6174, 5987.4181, 5955.4540, 5929.6054, 5912.2409, 5901.4768, 5890.7996, 5877.7666] +25-08-31 09:59:39 | D | - iter = [ 10, 11, 12, 13] +25-08-31 09:59:39 | D | - error = [ 5871.3201, 5863.8970, 5856.1776, 5860.8801] +25-08-31 09:59:39 | D | - best error = [ 5871.3201, 5863.8970, 5856.1776, 5856.1776] +25-08-31 09:59:39 | D | + Adding low-rank branches to transformer_blocks.7.attn.to_out.0 +25-08-31 09:59:39 | D | - Calibrating low-rank branch for transformer_blocks.7.attn.to_add_out +25-08-31 09:59:39 | D | + w: sfp4_e2m1_all +25-08-31 09:59:39 | D | + x: sfp4_e2m1_all +25-08-31 09:59:39 | D | + y: None +25-08-31 09:59:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:59:39 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:59:39 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:59:40 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 09:59:49 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 09:59:49 | D | - error = [ 1331.0408, 1311.5897, 1301.7187, 1297.7901, 1292.4453, 1286.5496, 1284.6743, 1282.4429, 1282.6421] +25-08-31 09:59:49 | D | - best error = [ 1331.0408, 1311.5897, 1301.7187, 1297.7901, 1292.4453, 1286.5496, 1284.6743, 1282.4429, 1282.4429] +25-08-31 09:59:49 | D | + Adding low-rank branches to transformer_blocks.7.attn.to_add_out +25-08-31 09:59:49 | D | - Calibrating low-rank branch for transformer_blocks.7.ff.net.0.proj +25-08-31 09:59:49 | D | + w: sfp4_e2m1_all +25-08-31 09:59:49 | D | + x: sfp4_e2m1_all +25-08-31 09:59:49 | D | + y: None +25-08-31 09:59:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 09:59:49 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 09:59:49 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 09:59:51 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:00:11 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:00:11 | D | - error = [ 5137.9002, 5070.5658, 5031.8808, 5000.4288, 4976.1700, 4956.3660, 4944.6761, 4931.6168, 4921.2990, 4911.9047] +25-08-31 10:00:11 | D | - best error = [ 5137.9002, 5070.5658, 5031.8808, 5000.4288, 4976.1700, 4956.3660, 4944.6761, 4931.6168, 4921.2990, 4911.9047] +25-08-31 10:00:31 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:00:31 | D | - error = [ 4906.8717, 4900.5377, 4894.4176, 4888.9473, 4884.2264, 4880.8782, 4877.3567, 4875.1527, 4873.4920, 4870.6018] +25-08-31 10:00:31 | D | - best error = [ 4906.8717, 4900.5377, 4894.4176, 4888.9473, 4884.2264, 4880.8782, 4877.3567, 4875.1527, 4873.4920, 4870.6018] +25-08-31 10:00:51 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 10:00:51 | D | - error = [ 4869.4320, 4867.1735, 4863.9264, 4862.4619, 4860.2078, 4858.3075, 4857.1747, 4855.0232, 4854.1100, 4852.7518] +25-08-31 10:00:51 | D | - best error = [ 4869.4320, 4867.1735, 4863.9264, 4862.4619, 4860.2078, 4858.3075, 4857.1747, 4855.0232, 4854.1100, 4852.7518] +25-08-31 10:01:12 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-31 10:01:12 | D | - error = [ 4851.0751, 4850.1478, 4847.0791, 4846.5256, 4846.1785, 4844.5035, 4842.8762, 4841.9358, 4841.3042, 4841.4157] +25-08-31 10:01:12 | D | - best error = [ 4851.0751, 4850.1478, 4847.0791, 4846.5256, 4846.1785, 4844.5035, 4842.8762, 4841.9358, 4841.3042, 4841.3042] +25-08-31 10:01:12 | D | + Adding low-rank branches to transformer_blocks.7.ff.net.0.proj +25-08-31 10:01:12 | D | - Calibrating low-rank branch for transformer_blocks.7.ff.net.2 +25-08-31 10:01:12 | D | + w: sfp4_e2m1_all +25-08-31 10:01:12 | D | + x: sfp4_e2m1_all +25-08-31 10:01:12 | D | + y: None +25-08-31 10:01:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:01:12 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:01:12 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:01:16 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:01:47 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:01:47 | D | - error = [ 4877.2349, 4848.3869, 4826.2310, 4810.2048, 4799.0416, 4788.7000, 4781.4291, 4774.3134, 4767.8763, 4762.7605] +25-08-31 10:01:47 | D | - best error = [ 4877.2349, 4848.3869, 4826.2310, 4810.2048, 4799.0416, 4788.7000, 4781.4291, 4774.3134, 4767.8763, 4762.7605] +25-08-31 10:02:16 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:02:16 | D | - error = [ 4757.8289, 4755.4371, 4752.4966, 4747.9665, 4744.0310, 4740.7539, 4736.0207, 4733.7843, 4731.3068, 4729.8670] +25-08-31 10:02:16 | D | - best error = [ 4757.8289, 4755.4371, 4752.4966, 4747.9665, 4744.0310, 4740.7539, 4736.0207, 4733.7843, 4731.3068, 4729.8670] +25-08-31 10:02:47 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 10:02:47 | D | - error = [ 4728.1956, 4725.1914, 4723.9086, 4721.8721, 4720.8525, 4718.4779, 4717.7762, 4716.6461, 4716.2871, 4715.0253] +25-08-31 10:02:47 | D | - best error = [ 4728.1956, 4725.1914, 4723.9086, 4721.8721, 4720.8525, 4718.4779, 4717.7762, 4716.6461, 4716.2871, 4715.0253] +25-08-31 10:03:13 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38] +25-08-31 10:03:13 | D | - error = [ 4713.3880, 4711.5350, 4710.1681, 4709.2590, 4708.3823, 4706.7968, 4705.0573, 4704.0723, 4704.0759] +25-08-31 10:03:13 | D | - best error = [ 4713.3880, 4711.5350, 4710.1681, 4709.2590, 4708.3823, 4706.7968, 4705.0573, 4704.0723, 4704.0723] +25-08-31 10:03:13 | D | + Adding low-rank branches to transformer_blocks.7.ff.net.2 +25-08-31 10:03:13 | D | - Calibrating low-rank branch for transformer_blocks.7.ff_context.net.0.proj +25-08-31 10:03:13 | D | + w: sfp4_e2m1_all +25-08-31 10:03:13 | D | + x: sfp4_e2m1_all +25-08-31 10:03:13 | D | + y: None +25-08-31 10:03:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:03:13 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:03:13 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:03:14 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:03:18 | D | - iter = [ 0, 1, 2, 3] +25-08-31 10:03:18 | D | - error = [ 1091.4182, 1065.3049, 1045.5403, 1047.4469] +25-08-31 10:03:18 | D | - best error = [ 1091.4182, 1065.3049, 1045.5403, 1045.5403] +25-08-31 10:03:19 | D | + Adding low-rank branches to transformer_blocks.7.ff_context.net.0.proj +25-08-31 10:03:19 | D | - Calibrating low-rank branch for transformer_blocks.7.ff_context.net.2 +25-08-31 10:03:19 | D | + w: sfp4_e2m1_all +25-08-31 10:03:19 | D | + x: sfp4_e2m1_all +25-08-31 10:03:19 | D | + y: None +25-08-31 10:03:19 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:03:19 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:03:19 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:03:20 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:03:32 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:03:32 | D | - error = [ 332.9257, 329.9071, 327.9888, 325.8338, 324.3972, 323.0900, 322.2081, 321.4711, 321.1679, 320.5686] +25-08-31 10:03:32 | D | - best error = [ 332.9257, 329.9071, 327.9888, 325.8338, 324.3972, 323.0900, 322.2081, 321.4711, 321.1679, 320.5686] +25-08-31 10:03:36 | D | - iter = [ 10, 11, 12] +25-08-31 10:03:36 | D | - error = [ 319.8213, 319.4384, 319.7034] +25-08-31 10:03:36 | D | - best error = [ 319.8213, 319.4384, 319.4384] +25-08-31 10:03:36 | D | + Adding low-rank branches to transformer_blocks.7.ff_context.net.2 +25-08-31 10:03:54 | D | - Calibrating low-rank branches of block transformer_blocks.8 +25-08-31 10:03:54 | D | - Calibrating low-rank branch for transformer_blocks.8.attn.to_q, transformer_blocks.8.attn.to_k, transformer_blocks.8.attn.to_v +25-08-31 10:03:54 | D | + w: sfp4_e2m1_all +25-08-31 10:03:54 | D | + x: sfp4_e2m1_all +25-08-31 10:03:54 | D | + y: None +25-08-31 10:03:54 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:03:54 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:03:54 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:03:55 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:04:06 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 10:04:06 | D | - error = [ 2111.1036, 1998.1984, 1983.9752, 1951.8817, 1947.8745, 1953.8193] +25-08-31 10:04:06 | D | - best error = [ 2111.1036, 1998.1984, 1983.9752, 1951.8817, 1947.8745, 1947.8745] +25-08-31 10:04:06 | D | + Adding low-rank branches to transformer_blocks.8.attn.to_q, transformer_blocks.8.attn.to_k, transformer_blocks.8.attn.to_v +25-08-31 10:04:07 | D | - Calibrating low-rank branch for transformer_blocks.8.attn.add_q_proj, transformer_blocks.8.attn.add_k_proj, transformer_blocks.8.attn.add_v_proj +25-08-31 10:04:07 | D | + w: sfp4_e2m1_all +25-08-31 10:04:07 | D | + x: sfp4_e2m1_all +25-08-31 10:04:07 | D | + y: None +25-08-31 10:04:07 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:04:07 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:04:07 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:04:08 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:04:19 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 10:04:19 | D | - error = [ 802.3943, 788.7721, 769.3222, 756.7064, 748.5804, 744.9815, 753.9822] +25-08-31 10:04:19 | D | - best error = [ 802.3943, 788.7721, 769.3222, 756.7064, 748.5804, 744.9815, 744.9815] +25-08-31 10:04:19 | D | + Adding low-rank branches to transformer_blocks.8.attn.add_q_proj, transformer_blocks.8.attn.add_k_proj, transformer_blocks.8.attn.add_v_proj +25-08-31 10:04:20 | D | - Calibrating low-rank branch for transformer_blocks.8.attn.to_out.0 +25-08-31 10:04:20 | D | + w: sfp4_e2m1_all +25-08-31 10:04:20 | D | + x: sfp4_e2m1_all +25-08-31 10:04:20 | D | + y: None +25-08-31 10:04:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:04:20 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:04:20 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:04:21 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:04:35 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:04:35 | D | - error = [ 8185.8659, 8119.4034, 8057.4445, 8019.4768, 7983.9108, 7957.7316, 7932.9001, 7911.2578, 7894.4527, 7879.3896] +25-08-31 10:04:35 | D | - best error = [ 8185.8659, 8119.4034, 8057.4445, 8019.4768, 7983.9108, 7957.7316, 7932.9001, 7911.2578, 7894.4527, 7879.3896] +25-08-31 10:04:47 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17] +25-08-31 10:04:47 | D | - error = [ 7862.8689, 7850.5085, 7837.8900, 7833.5877, 7825.6936, 7820.4910, 7817.9936, 7824.9156] +25-08-31 10:04:47 | D | - best error = [ 7862.8689, 7850.5085, 7837.8900, 7833.5877, 7825.6936, 7820.4910, 7817.9936, 7817.9936] +25-08-31 10:04:47 | D | + Adding low-rank branches to transformer_blocks.8.attn.to_out.0 +25-08-31 10:04:47 | D | - Calibrating low-rank branch for transformer_blocks.8.attn.to_add_out +25-08-31 10:04:47 | D | + w: sfp4_e2m1_all +25-08-31 10:04:47 | D | + x: sfp4_e2m1_all +25-08-31 10:04:47 | D | + y: None +25-08-31 10:04:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:04:47 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:04:47 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:04:48 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:04:58 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:04:58 | D | - error = [ 1642.7104, 1627.7785, 1619.6392, 1605.2307, 1599.9075, 1595.9005, 1590.8544, 1588.6542, 1583.5482, 1580.9490] +25-08-31 10:04:58 | D | - best error = [ 1642.7104, 1627.7785, 1619.6392, 1605.2307, 1599.9075, 1595.9005, 1590.8544, 1588.6542, 1583.5482, 1580.9490] +25-08-31 10:05:04 | D | - iter = [ 10, 11, 12, 13, 14, 15] +25-08-31 10:05:04 | D | - error = [ 1576.5842, 1572.2986, 1568.4581, 1566.5273, 1563.6991, 1564.6338] +25-08-31 10:05:04 | D | - best error = [ 1576.5842, 1572.2986, 1568.4581, 1566.5273, 1563.6991, 1563.6991] +25-08-31 10:05:04 | D | + Adding low-rank branches to transformer_blocks.8.attn.to_add_out +25-08-31 10:05:04 | D | - Calibrating low-rank branch for transformer_blocks.8.ff.net.0.proj +25-08-31 10:05:04 | D | + w: sfp4_e2m1_all +25-08-31 10:05:04 | D | + x: sfp4_e2m1_all +25-08-31 10:05:04 | D | + y: None +25-08-31 10:05:04 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:05:04 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:05:04 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:05:06 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:05:24 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 10:05:24 | D | - error = [ 5225.6536, 5150.5262, 5101.9945, 5064.2415, 5039.1698, 5020.1486, 5003.5413, 4990.2465, 4999.7670] +25-08-31 10:05:24 | D | - best error = [ 5225.6536, 5150.5262, 5101.9945, 5064.2415, 5039.1698, 5020.1486, 5003.5413, 4990.2465, 4990.2465] +25-08-31 10:05:24 | D | + Adding low-rank branches to transformer_blocks.8.ff.net.0.proj +25-08-31 10:05:24 | D | - Calibrating low-rank branch for transformer_blocks.8.ff.net.2 +25-08-31 10:05:24 | D | + w: sfp4_e2m1_all +25-08-31 10:05:24 | D | + x: sfp4_e2m1_all +25-08-31 10:05:24 | D | + y: None +25-08-31 10:05:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:05:24 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:05:24 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:05:29 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:05:58 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:05:58 | D | - error = [ 4367.6094, 4344.0838, 4327.1878, 4312.9850, 4300.5574, 4291.9863, 4288.2675, 4278.2634, 4270.9723, 4266.3565] +25-08-31 10:05:58 | D | - best error = [ 4367.6094, 4344.0838, 4327.1878, 4312.9850, 4300.5574, 4291.9863, 4288.2675, 4278.2634, 4270.9723, 4266.3565] +25-08-31 10:06:19 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16] +25-08-31 10:06:19 | D | - error = [ 4258.9151, 4252.5216, 4247.7891, 4243.2155, 4240.6037, 4236.7105, 4246.4988] +25-08-31 10:06:19 | D | - best error = [ 4258.9151, 4252.5216, 4247.7891, 4243.2155, 4240.6037, 4236.7105, 4236.7105] +25-08-31 10:06:19 | D | + Adding low-rank branches to transformer_blocks.8.ff.net.2 +25-08-31 10:06:20 | D | - Calibrating low-rank branch for transformer_blocks.8.ff_context.net.0.proj +25-08-31 10:06:20 | D | + w: sfp4_e2m1_all +25-08-31 10:06:20 | D | + x: sfp4_e2m1_all +25-08-31 10:06:20 | D | + y: None +25-08-31 10:06:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:06:20 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:06:20 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:06:20 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:06:31 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:06:31 | D | - error = [ 1130.7051, 1101.8113, 1081.9521, 1068.1887, 1058.0129, 1052.7361, 1047.9023, 1043.0455, 1039.4758, 1036.3058] +25-08-31 10:06:31 | D | - best error = [ 1130.7051, 1101.8113, 1081.9521, 1068.1887, 1058.0129, 1052.7361, 1047.9023, 1043.0455, 1039.4758, 1036.3058] +25-08-31 10:06:35 | D | - iter = [ 10, 11, 12] +25-08-31 10:06:35 | D | - error = [ 1033.2472, 1029.2010, 1029.7613] +25-08-31 10:06:35 | D | - best error = [ 1033.2472, 1029.2010, 1029.2010] +25-08-31 10:06:35 | D | + Adding low-rank branches to transformer_blocks.8.ff_context.net.0.proj +25-08-31 10:06:35 | D | - Calibrating low-rank branch for transformer_blocks.8.ff_context.net.2 +25-08-31 10:06:35 | D | + w: sfp4_e2m1_all +25-08-31 10:06:35 | D | + x: sfp4_e2m1_all +25-08-31 10:06:35 | D | + y: None +25-08-31 10:06:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:06:35 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:06:35 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:06:36 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:06:48 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:06:48 | D | - error = [ 293.0445, 290.8126, 289.2611, 288.5005, 287.7310, 286.3530, 285.6376, 284.8518, 284.2509, 283.8680] +25-08-31 10:06:48 | D | - best error = [ 293.0445, 290.8126, 289.2611, 288.5005, 287.7310, 286.3530, 285.6376, 284.8518, 284.2509, 283.8680] +25-08-31 10:06:56 | D | - iter = [ 10, 11, 12, 13, 14, 15] +25-08-31 10:06:56 | D | - error = [ 283.5983, 283.2819, 283.2393, 283.0097, 282.9574, 282.9826] +25-08-31 10:06:56 | D | - best error = [ 283.5983, 283.2819, 283.2393, 283.0097, 282.9574, 282.9574] +25-08-31 10:06:56 | D | + Adding low-rank branches to transformer_blocks.8.ff_context.net.2 +25-08-31 10:07:14 | D | - Calibrating low-rank branches of block transformer_blocks.9 +25-08-31 10:07:14 | D | - Calibrating low-rank branch for transformer_blocks.9.attn.to_q, transformer_blocks.9.attn.to_k, transformer_blocks.9.attn.to_v +25-08-31 10:07:14 | D | + w: sfp4_e2m1_all +25-08-31 10:07:14 | D | + x: sfp4_e2m1_all +25-08-31 10:07:14 | D | + y: None +25-08-31 10:07:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:07:14 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:07:14 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:07:15 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:07:29 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 10:07:29 | D | - error = [ 2501.0597, 2474.3707, 2459.8709, 2447.9436, 2429.0618, 2421.0797, 2430.3668] +25-08-31 10:07:29 | D | - best error = [ 2501.0597, 2474.3707, 2459.8709, 2447.9436, 2429.0618, 2421.0797, 2421.0797] +25-08-31 10:07:29 | D | + Adding low-rank branches to transformer_blocks.9.attn.to_q, transformer_blocks.9.attn.to_k, transformer_blocks.9.attn.to_v +25-08-31 10:07:29 | D | - Calibrating low-rank branch for transformer_blocks.9.attn.add_q_proj, transformer_blocks.9.attn.add_k_proj, transformer_blocks.9.attn.add_v_proj +25-08-31 10:07:29 | D | + w: sfp4_e2m1_all +25-08-31 10:07:29 | D | + x: sfp4_e2m1_all +25-08-31 10:07:29 | D | + y: None +25-08-31 10:07:29 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:07:29 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:07:29 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:07:30 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:07:38 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-31 10:07:38 | D | - error = [ 724.0293, 694.2940, 680.6892, 675.7110, 679.2830] +25-08-31 10:07:38 | D | - best error = [ 724.0293, 694.2940, 680.6892, 675.7110, 675.7110] +25-08-31 10:07:38 | D | + Adding low-rank branches to transformer_blocks.9.attn.add_q_proj, transformer_blocks.9.attn.add_k_proj, transformer_blocks.9.attn.add_v_proj +25-08-31 10:07:39 | D | - Calibrating low-rank branch for transformer_blocks.9.attn.to_out.0 +25-08-31 10:07:39 | D | + w: sfp4_e2m1_all +25-08-31 10:07:39 | D | + x: sfp4_e2m1_all +25-08-31 10:07:39 | D | + y: None +25-08-31 10:07:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:07:39 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:07:39 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:07:40 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:07:54 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:07:54 | D | - error = [ 5232.4373, 5181.8659, 5144.8658, 5127.4816, 5111.9169, 5092.8098, 5086.7699, 5077.4050, 5067.3200, 5062.5482] +25-08-31 10:07:54 | D | - best error = [ 5232.4373, 5181.8659, 5144.8658, 5127.4816, 5111.9169, 5092.8098, 5086.7699, 5077.4050, 5067.3200, 5062.5482] +25-08-31 10:08:05 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16] +25-08-31 10:08:05 | D | - error = [ 5052.6684, 5052.5077, 5045.2181, 5037.2223, 5032.7782, 5029.0838, 5031.2052] +25-08-31 10:08:05 | D | - best error = [ 5052.6684, 5052.5077, 5045.2181, 5037.2223, 5032.7782, 5029.0838, 5029.0838] +25-08-31 10:08:05 | D | + Adding low-rank branches to transformer_blocks.9.attn.to_out.0 +25-08-31 10:08:05 | D | - Calibrating low-rank branch for transformer_blocks.9.attn.to_add_out +25-08-31 10:08:05 | D | + w: sfp4_e2m1_all +25-08-31 10:08:05 | D | + x: sfp4_e2m1_all +25-08-31 10:08:05 | D | + y: None +25-08-31 10:08:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:08:05 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:08:05 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:08:05 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:08:16 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:08:16 | D | - error = [ 1344.4741, 1328.3914, 1319.9991, 1309.9887, 1305.6759, 1298.9953, 1296.0963, 1293.4411, 1290.2902, 1287.0269] +25-08-31 10:08:16 | D | - best error = [ 1344.4741, 1328.3914, 1319.9991, 1309.9887, 1305.6759, 1298.9953, 1296.0963, 1293.4411, 1290.2902, 1287.0269] +25-08-31 10:08:18 | D | - iter = [ 10, 11] +25-08-31 10:08:18 | D | - error = [ 1285.6275, 1285.9960] +25-08-31 10:08:18 | D | - best error = [ 1285.6275, 1285.6275] +25-08-31 10:08:18 | D | + Adding low-rank branches to transformer_blocks.9.attn.to_add_out +25-08-31 10:08:18 | D | - Calibrating low-rank branch for transformer_blocks.9.ff.net.0.proj +25-08-31 10:08:18 | D | + w: sfp4_e2m1_all +25-08-31 10:08:18 | D | + x: sfp4_e2m1_all +25-08-31 10:08:18 | D | + y: None +25-08-31 10:08:18 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:08:18 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:08:18 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:08:20 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:08:40 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:08:40 | D | - error = [ 5552.7485, 5475.6695, 5424.6991, 5390.8385, 5367.1620, 5348.6912, 5331.5911, 5317.5840, 5307.6245, 5297.8960] +25-08-31 10:08:40 | D | - best error = [ 5552.7485, 5475.6695, 5424.6991, 5390.8385, 5367.1620, 5348.6912, 5331.5911, 5317.5840, 5307.6245, 5297.8960] +25-08-31 10:09:00 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:09:00 | D | - error = [ 5289.6236, 5284.1359, 5278.0269, 5271.5584, 5266.8355, 5263.7454, 5258.3442, 5254.7487, 5251.1806, 5248.3845] +25-08-31 10:09:00 | D | - best error = [ 5289.6236, 5284.1359, 5278.0269, 5271.5584, 5266.8355, 5263.7454, 5258.3442, 5254.7487, 5251.1806, 5248.3845] +25-08-31 10:09:20 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 10:09:20 | D | - error = [ 5245.9020, 5243.0329, 5241.8344, 5239.4962, 5235.9945, 5234.4216, 5232.1260, 5231.2060, 5230.1191, 5227.3389] +25-08-31 10:09:20 | D | - best error = [ 5245.9020, 5243.0329, 5241.8344, 5239.4962, 5235.9945, 5234.4216, 5232.1260, 5231.2060, 5230.1191, 5227.3389] +25-08-31 10:09:41 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-31 10:09:41 | D | - error = [ 5225.0497, 5223.6805, 5221.9483, 5221.9460, 5221.5703, 5220.3383, 5218.5538, 5217.4639, 5216.2830, 5215.0260] +25-08-31 10:09:41 | D | - best error = [ 5225.0497, 5223.6805, 5221.9483, 5221.9460, 5221.5703, 5220.3383, 5218.5538, 5217.4639, 5216.2830, 5215.0260] +25-08-31 10:09:45 | D | - iter = [ 40, 41] +25-08-31 10:09:45 | D | - error = [ 5214.3011, 5214.6218] +25-08-31 10:09:45 | D | - best error = [ 5214.3011, 5214.3011] +25-08-31 10:09:45 | D | + Adding low-rank branches to transformer_blocks.9.ff.net.0.proj +25-08-31 10:09:45 | D | - Calibrating low-rank branch for transformer_blocks.9.ff.net.2 +25-08-31 10:09:45 | D | + w: sfp4_e2m1_all +25-08-31 10:09:45 | D | + x: sfp4_e2m1_all +25-08-31 10:09:45 | D | + y: None +25-08-31 10:09:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:09:45 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:09:45 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:09:50 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:10:19 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:10:19 | D | - error = [ 4947.5275, 4918.4491, 4897.6458, 4881.1265, 4869.7378, 4860.8477, 4851.8813, 4844.3342, 4838.2344, 4832.6759] +25-08-31 10:10:19 | D | - best error = [ 4947.5275, 4918.4491, 4897.6458, 4881.1265, 4869.7378, 4860.8477, 4851.8813, 4844.3342, 4838.2344, 4832.6759] +25-08-31 10:10:43 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17] +25-08-31 10:10:43 | D | - error = [ 4827.5292, 4824.3613, 4819.1947, 4814.5367, 4809.8524, 4808.2760, 4805.8471, 4812.4090] +25-08-31 10:10:43 | D | - best error = [ 4827.5292, 4824.3613, 4819.1947, 4814.5367, 4809.8524, 4808.2760, 4805.8471, 4805.8471] +25-08-31 10:10:44 | D | + Adding low-rank branches to transformer_blocks.9.ff.net.2 +25-08-31 10:10:44 | D | - Calibrating low-rank branch for transformer_blocks.9.ff_context.net.0.proj +25-08-31 10:10:44 | D | + w: sfp4_e2m1_all +25-08-31 10:10:44 | D | + x: sfp4_e2m1_all +25-08-31 10:10:44 | D | + y: None +25-08-31 10:10:44 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:10:44 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:10:44 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:10:44 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:10:56 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:10:56 | D | - error = [ 1245.8819, 1220.2594, 1201.8830, 1188.5842, 1180.8480, 1172.0865, 1166.8342, 1160.6646, 1157.0441, 1153.0920] +25-08-31 10:10:56 | D | - best error = [ 1245.8819, 1220.2594, 1201.8830, 1188.5842, 1180.8480, 1172.0865, 1166.8342, 1160.6646, 1157.0441, 1153.0920] +25-08-31 10:10:57 | D | - iter = [ 10] +25-08-31 10:10:57 | D | - error = [ 1153.1927] +25-08-31 10:10:57 | D | - best error = [ 1153.0920] +25-08-31 10:10:57 | D | + Adding low-rank branches to transformer_blocks.9.ff_context.net.0.proj +25-08-31 10:10:57 | D | - Calibrating low-rank branch for transformer_blocks.9.ff_context.net.2 +25-08-31 10:10:57 | D | + w: sfp4_e2m1_all +25-08-31 10:10:57 | D | + x: sfp4_e2m1_all +25-08-31 10:10:57 | D | + y: None +25-08-31 10:10:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:10:57 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:10:57 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:10:58 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:11:10 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:11:10 | D | - error = [ 374.1870, 370.9923, 368.5715, 366.4178, 365.0923, 363.7384, 363.6586, 362.6224, 361.3944, 360.5341] +25-08-31 10:11:10 | D | - best error = [ 374.1870, 370.9923, 368.5715, 366.4178, 365.0923, 363.7384, 363.6586, 362.6224, 361.3944, 360.5341] +25-08-31 10:11:23 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:11:23 | D | - error = [ 359.8994, 359.4850, 359.0496, 358.8159, 358.4567, 357.9782, 357.5067, 357.3406, 356.7975, 356.7156] +25-08-31 10:11:23 | D | - best error = [ 359.8994, 359.4850, 359.0496, 358.8159, 358.4567, 357.9782, 357.5067, 357.3406, 356.7975, 356.7156] +25-08-31 10:11:26 | D | - iter = [ 20, 21, 22] +25-08-31 10:11:26 | D | - error = [ 356.3832, 356.1127, 356.2652] +25-08-31 10:11:26 | D | - best error = [ 356.3832, 356.1127, 356.1127] +25-08-31 10:11:27 | D | + Adding low-rank branches to transformer_blocks.9.ff_context.net.2 +25-08-31 10:11:44 | D | - Calibrating low-rank branches of block transformer_blocks.10 +25-08-31 10:11:44 | D | - Calibrating low-rank branch for transformer_blocks.10.attn.to_q, transformer_blocks.10.attn.to_k, transformer_blocks.10.attn.to_v +25-08-31 10:11:44 | D | + w: sfp4_e2m1_all +25-08-31 10:11:44 | D | + x: sfp4_e2m1_all +25-08-31 10:11:44 | D | + y: None +25-08-31 10:11:44 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:11:44 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:11:44 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:11:45 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:12:00 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 10:12:00 | D | - error = [ 2810.4983, 2764.3111, 2714.0617, 2696.8175, 2678.2150, 2671.9908, 2662.8995, 2677.4022] +25-08-31 10:12:00 | D | - best error = [ 2810.4983, 2764.3111, 2714.0617, 2696.8175, 2678.2150, 2671.9908, 2662.8995, 2662.8995] +25-08-31 10:12:01 | D | + Adding low-rank branches to transformer_blocks.10.attn.to_q, transformer_blocks.10.attn.to_k, transformer_blocks.10.attn.to_v +25-08-31 10:12:01 | D | - Calibrating low-rank branch for transformer_blocks.10.attn.add_q_proj, transformer_blocks.10.attn.add_k_proj, transformer_blocks.10.attn.add_v_proj +25-08-31 10:12:01 | D | + w: sfp4_e2m1_all +25-08-31 10:12:01 | D | + x: sfp4_e2m1_all +25-08-31 10:12:01 | D | + y: None +25-08-31 10:12:01 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:12:01 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:12:01 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:12:02 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:12:13 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 10:12:13 | D | - error = [ 989.7202, 969.6742, 949.9187, 942.4499, 933.1135, 932.6041, 932.6648] +25-08-31 10:12:13 | D | - best error = [ 989.7202, 969.6742, 949.9187, 942.4499, 933.1135, 932.6041, 932.6041] +25-08-31 10:12:14 | D | + Adding low-rank branches to transformer_blocks.10.attn.add_q_proj, transformer_blocks.10.attn.add_k_proj, transformer_blocks.10.attn.add_v_proj +25-08-31 10:12:14 | D | - Calibrating low-rank branch for transformer_blocks.10.attn.to_out.0 +25-08-31 10:12:14 | D | + w: sfp4_e2m1_all +25-08-31 10:12:14 | D | + x: sfp4_e2m1_all +25-08-31 10:12:14 | D | + y: None +25-08-31 10:12:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:12:14 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:12:14 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:12:15 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:12:29 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:12:29 | D | - error = [ 5337.4897, 5282.4731, 5249.0814, 5230.6147, 5207.5825, 5194.6619, 5183.5318, 5170.3435, 5157.8449, 5145.7365] +25-08-31 10:12:29 | D | - best error = [ 5337.4897, 5282.4731, 5249.0814, 5230.6147, 5207.5825, 5194.6619, 5183.5318, 5170.3435, 5157.8449, 5145.7365] +25-08-31 10:12:36 | D | - iter = [ 10, 11, 12, 13, 14] +25-08-31 10:12:36 | D | - error = [ 5137.7639, 5134.8402, 5119.9199, 5113.9441, 5115.2677] +25-08-31 10:12:36 | D | - best error = [ 5137.7639, 5134.8402, 5119.9199, 5113.9441, 5113.9441] +25-08-31 10:12:36 | D | + Adding low-rank branches to transformer_blocks.10.attn.to_out.0 +25-08-31 10:12:37 | D | - Calibrating low-rank branch for transformer_blocks.10.attn.to_add_out +25-08-31 10:12:37 | D | + w: sfp4_e2m1_all +25-08-31 10:12:37 | D | + x: sfp4_e2m1_all +25-08-31 10:12:37 | D | + y: None +25-08-31 10:12:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:12:37 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:12:37 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:12:37 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:12:47 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:12:47 | D | - error = [ 1341.7429, 1325.4914, 1315.3367, 1309.3174, 1299.6401, 1294.9511, 1294.4870, 1292.5904, 1289.4448, 1290.1034] +25-08-31 10:12:47 | D | - best error = [ 1341.7429, 1325.4914, 1315.3367, 1309.3174, 1299.6401, 1294.9511, 1294.4870, 1292.5904, 1289.4448, 1289.4448] +25-08-31 10:12:48 | D | + Adding low-rank branches to transformer_blocks.10.attn.to_add_out +25-08-31 10:12:48 | D | - Calibrating low-rank branch for transformer_blocks.10.ff.net.0.proj +25-08-31 10:12:48 | D | + w: sfp4_e2m1_all +25-08-31 10:12:48 | D | + x: sfp4_e2m1_all +25-08-31 10:12:48 | D | + y: None +25-08-31 10:12:48 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:12:48 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:12:48 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:12:49 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:12:58 | D | - iter = [ 0, 1, 2, 3] +25-08-31 10:12:58 | D | - error = [ 5312.2545, 5239.1719, 5192.5508, 5193.2027] +25-08-31 10:12:58 | D | - best error = [ 5312.2545, 5239.1719, 5192.5508, 5192.5508] +25-08-31 10:12:58 | D | + Adding low-rank branches to transformer_blocks.10.ff.net.0.proj +25-08-31 10:12:58 | D | - Calibrating low-rank branch for transformer_blocks.10.ff.net.2 +25-08-31 10:12:58 | D | + w: sfp4_e2m1_all +25-08-31 10:12:58 | D | + x: sfp4_e2m1_all +25-08-31 10:12:58 | D | + y: None +25-08-31 10:12:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:12:58 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:12:58 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:13:02 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:13:32 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:13:32 | D | - error = [ 3707.8406, 3687.2233, 3671.0000, 3658.0252, 3648.4675, 3641.6174, 3633.5490, 3630.0850, 3623.2390, 3617.6263] +25-08-31 10:13:32 | D | - best error = [ 3707.8406, 3687.2233, 3671.0000, 3658.0252, 3648.4675, 3641.6174, 3633.5490, 3630.0850, 3623.2390, 3617.6263] +25-08-31 10:14:02 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:14:02 | D | - error = [ 3612.8301, 3609.1209, 3606.6583, 3604.1896, 3602.2654, 3599.6664, 3598.1163, 3596.2784, 3594.8313, 3594.7556] +25-08-31 10:14:02 | D | - best error = [ 3612.8301, 3609.1209, 3606.6583, 3604.1896, 3602.2654, 3599.6664, 3598.1163, 3596.2784, 3594.8313, 3594.7556] +25-08-31 10:14:33 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 10:14:33 | D | - error = [ 3593.0129, 3591.5850, 3589.6895, 3588.7654, 3587.5133, 3586.9246, 3586.4637, 3585.6377, 3584.8623, 3584.0247] +25-08-31 10:14:33 | D | - best error = [ 3593.0129, 3591.5850, 3589.6895, 3588.7654, 3587.5133, 3586.9246, 3586.4637, 3585.6377, 3584.8623, 3584.0247] +25-08-31 10:14:51 | D | - iter = [ 30, 31, 32, 33, 34, 35] +25-08-31 10:14:51 | D | - error = [ 3583.3427, 3581.3107, 3580.3228, 3578.7009, 3577.6972, 3578.5668] +25-08-31 10:14:51 | D | - best error = [ 3583.3427, 3581.3107, 3580.3228, 3578.7009, 3577.6972, 3577.6972] +25-08-31 10:14:51 | D | + Adding low-rank branches to transformer_blocks.10.ff.net.2 +25-08-31 10:14:51 | D | - Calibrating low-rank branch for transformer_blocks.10.ff_context.net.0.proj +25-08-31 10:14:51 | D | + w: sfp4_e2m1_all +25-08-31 10:14:51 | D | + x: sfp4_e2m1_all +25-08-31 10:14:51 | D | + y: None +25-08-31 10:14:51 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:14:51 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:14:51 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:14:52 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:15:01 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 10:15:01 | D | - error = [ 1709.7431, 1692.6304, 1683.2046, 1676.4148, 1672.1974, 1667.6566, 1662.9722, 1664.5495] +25-08-31 10:15:01 | D | - best error = [ 1709.7431, 1692.6304, 1683.2046, 1676.4148, 1672.1974, 1667.6566, 1662.9722, 1662.9722] +25-08-31 10:15:01 | D | + Adding low-rank branches to transformer_blocks.10.ff_context.net.0.proj +25-08-31 10:15:01 | D | - Calibrating low-rank branch for transformer_blocks.10.ff_context.net.2 +25-08-31 10:15:01 | D | + w: sfp4_e2m1_all +25-08-31 10:15:01 | D | + x: sfp4_e2m1_all +25-08-31 10:15:01 | D | + y: None +25-08-31 10:15:01 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:15:01 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:15:01 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:15:02 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:15:07 | D | - iter = [ 0, 1, 2, 3] +25-08-31 10:15:07 | D | - error = [ 466.0358, 464.6788, 462.1776, 462.1931] +25-08-31 10:15:07 | D | - best error = [ 466.0358, 464.6788, 462.1776, 462.1776] +25-08-31 10:15:07 | D | + Adding low-rank branches to transformer_blocks.10.ff_context.net.2 +25-08-31 10:15:25 | D | - Calibrating low-rank branches of block transformer_blocks.11 +25-08-31 10:15:25 | D | - Calibrating low-rank branch for transformer_blocks.11.attn.to_q, transformer_blocks.11.attn.to_k, transformer_blocks.11.attn.to_v +25-08-31 10:15:25 | D | + w: sfp4_e2m1_all +25-08-31 10:15:25 | D | + x: sfp4_e2m1_all +25-08-31 10:15:25 | D | + y: None +25-08-31 10:15:25 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:15:25 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:15:25 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:15:26 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:15:35 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-31 10:15:35 | D | - error = [ 2752.6483, 2712.4753, 2697.5176, 2650.4062, 2650.7423] +25-08-31 10:15:35 | D | - best error = [ 2752.6483, 2712.4753, 2697.5176, 2650.4062, 2650.4062] +25-08-31 10:15:36 | D | + Adding low-rank branches to transformer_blocks.11.attn.to_q, transformer_blocks.11.attn.to_k, transformer_blocks.11.attn.to_v +25-08-31 10:15:36 | D | - Calibrating low-rank branch for transformer_blocks.11.attn.add_q_proj, transformer_blocks.11.attn.add_k_proj, transformer_blocks.11.attn.add_v_proj +25-08-31 10:15:36 | D | + w: sfp4_e2m1_all +25-08-31 10:15:36 | D | + x: sfp4_e2m1_all +25-08-31 10:15:36 | D | + y: None +25-08-31 10:15:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:15:36 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:15:36 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:15:37 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:15:50 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 10:15:50 | D | - error = [ 1152.7864, 1125.2918, 1109.4423, 1089.5603, 1089.5227, 1084.2141, 1083.5179, 1090.2723] +25-08-31 10:15:50 | D | - best error = [ 1152.7864, 1125.2918, 1109.4423, 1089.5603, 1089.5227, 1084.2141, 1083.5179, 1083.5179] +25-08-31 10:15:50 | D | + Adding low-rank branches to transformer_blocks.11.attn.add_q_proj, transformer_blocks.11.attn.add_k_proj, transformer_blocks.11.attn.add_v_proj +25-08-31 10:15:50 | D | - Calibrating low-rank branch for transformer_blocks.11.attn.to_out.0 +25-08-31 10:15:50 | D | + w: sfp4_e2m1_all +25-08-31 10:15:50 | D | + x: sfp4_e2m1_all +25-08-31 10:15:50 | D | + y: None +25-08-31 10:15:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:15:50 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:15:50 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:15:52 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:16:04 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 10:16:04 | D | - error = [ 6701.2796, 6643.4783, 6590.3696, 6570.3522, 6541.3858, 6522.9757, 6497.5832, 6484.9068, 6497.1114] +25-08-31 10:16:04 | D | - best error = [ 6701.2796, 6643.4783, 6590.3696, 6570.3522, 6541.3858, 6522.9757, 6497.5832, 6484.9068, 6484.9068] +25-08-31 10:16:05 | D | + Adding low-rank branches to transformer_blocks.11.attn.to_out.0 +25-08-31 10:16:05 | D | - Calibrating low-rank branch for transformer_blocks.11.attn.to_add_out +25-08-31 10:16:05 | D | + w: sfp4_e2m1_all +25-08-31 10:16:05 | D | + x: sfp4_e2m1_all +25-08-31 10:16:05 | D | + y: None +25-08-31 10:16:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:16:05 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:16:05 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:16:05 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:16:14 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 10:16:14 | D | - error = [ 1596.7863, 1584.6511, 1575.2889, 1565.4249, 1556.1700, 1547.8013, 1542.0602, 1539.4929, 1540.9620] +25-08-31 10:16:14 | D | - best error = [ 1596.7863, 1584.6511, 1575.2889, 1565.4249, 1556.1700, 1547.8013, 1542.0602, 1539.4929, 1539.4929] +25-08-31 10:16:15 | D | + Adding low-rank branches to transformer_blocks.11.attn.to_add_out +25-08-31 10:16:15 | D | - Calibrating low-rank branch for transformer_blocks.11.ff.net.0.proj +25-08-31 10:16:15 | D | + w: sfp4_e2m1_all +25-08-31 10:16:15 | D | + x: sfp4_e2m1_all +25-08-31 10:16:15 | D | + y: None +25-08-31 10:16:15 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:16:15 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:16:15 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:16:16 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:16:34 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 10:16:34 | D | - error = [ 5349.6787, 5291.0696, 5233.3892, 5193.2283, 5169.8611, 5152.9397, 5136.7492, 5123.4203, 5152.4062] +25-08-31 10:16:34 | D | - best error = [ 5349.6787, 5291.0696, 5233.3892, 5193.2283, 5169.8611, 5152.9397, 5136.7492, 5123.4203, 5123.4203] +25-08-31 10:16:35 | D | + Adding low-rank branches to transformer_blocks.11.ff.net.0.proj +25-08-31 10:16:35 | D | - Calibrating low-rank branch for transformer_blocks.11.ff.net.2 +25-08-31 10:16:35 | D | + w: sfp4_e2m1_all +25-08-31 10:16:35 | D | + x: sfp4_e2m1_all +25-08-31 10:16:35 | D | + y: None +25-08-31 10:16:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:16:35 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:16:35 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:16:39 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:17:08 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:17:08 | D | - error = [ 4368.4569, 4341.0144, 4324.4745, 4308.8811, 4298.1903, 4287.7533, 4281.6508, 4276.4932, 4270.9804, 4266.7137] +25-08-31 10:17:09 | D | - best error = [ 4368.4569, 4341.0144, 4324.4745, 4308.8811, 4298.1903, 4287.7533, 4281.6508, 4276.4932, 4270.9804, 4266.7137] +25-08-31 10:17:39 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:17:39 | D | - error = [ 4262.3496, 4259.5401, 4254.4739, 4249.8289, 4246.3102, 4243.2879, 4239.9486, 4238.9352, 4235.1581, 4233.0296] +25-08-31 10:17:39 | D | - best error = [ 4262.3496, 4259.5401, 4254.4739, 4249.8289, 4246.3102, 4243.2879, 4239.9486, 4238.9352, 4235.1581, 4233.0296] +25-08-31 10:18:08 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 10:18:08 | D | - error = [ 4231.2100, 4229.7094, 4229.0826, 4227.6360, 4227.1385, 4225.6417, 4223.6614, 4221.5445, 4221.0584, 4219.8486] +25-08-31 10:18:08 | D | - best error = [ 4231.2100, 4229.7094, 4229.0826, 4227.6360, 4227.1385, 4225.6417, 4223.6614, 4221.5445, 4221.0584, 4219.8486] +25-08-31 10:18:38 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-31 10:18:38 | D | - error = [ 4218.3890, 4217.6674, 4217.2515, 4216.8659, 4215.6791, 4213.7596, 4213.2686, 4212.9980, 4212.4261, 4211.4197] +25-08-31 10:18:38 | D | - best error = [ 4218.3890, 4217.6674, 4217.2515, 4216.8659, 4215.6791, 4213.7596, 4213.2686, 4212.9980, 4212.4261, 4211.4197] +25-08-31 10:18:50 | D | - iter = [ 40, 41, 42, 43] +25-08-31 10:18:50 | D | - error = [ 4211.0999, 4210.1233, 4209.3123, 4209.3204] +25-08-31 10:18:50 | D | - best error = [ 4211.0999, 4210.1233, 4209.3123, 4209.3123] +25-08-31 10:18:51 | D | + Adding low-rank branches to transformer_blocks.11.ff.net.2 +25-08-31 10:18:51 | D | - Calibrating low-rank branch for transformer_blocks.11.ff_context.net.0.proj +25-08-31 10:18:51 | D | + w: sfp4_e2m1_all +25-08-31 10:18:51 | D | + x: sfp4_e2m1_all +25-08-31 10:18:51 | D | + y: None +25-08-31 10:18:51 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:18:51 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:18:51 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:18:51 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:18:57 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-31 10:18:57 | D | - error = [ 1304.8511, 1266.5062, 1240.3120, 1223.4589, 1238.3154] +25-08-31 10:18:57 | D | - best error = [ 1304.8511, 1266.5062, 1240.3120, 1223.4589, 1223.4589] +25-08-31 10:18:57 | D | + Adding low-rank branches to transformer_blocks.11.ff_context.net.0.proj +25-08-31 10:18:57 | D | - Calibrating low-rank branch for transformer_blocks.11.ff_context.net.2 +25-08-31 10:18:57 | D | + w: sfp4_e2m1_all +25-08-31 10:18:57 | D | + x: sfp4_e2m1_all +25-08-31 10:18:57 | D | + y: None +25-08-31 10:18:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:18:57 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:18:57 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:18:58 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:19:11 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:19:11 | D | - error = [ 341.3737, 338.7037, 335.8428, 334.8644, 333.5880, 332.5350, 331.8853, 331.3576, 330.2803, 329.8256] +25-08-31 10:19:11 | D | - best error = [ 341.3737, 338.7037, 335.8428, 334.8644, 333.5880, 332.5350, 331.8853, 331.3576, 330.2803, 329.8256] +25-08-31 10:19:17 | D | - iter = [ 10, 11, 12, 13, 14] +25-08-31 10:19:17 | D | - error = [ 328.9332, 328.0347, 327.6012, 327.3901, 327.9807] +25-08-31 10:19:17 | D | - best error = [ 328.9332, 328.0347, 327.6012, 327.3901, 327.3901] +25-08-31 10:19:17 | D | + Adding low-rank branches to transformer_blocks.11.ff_context.net.2 +25-08-31 10:19:35 | D | - Calibrating low-rank branches of block transformer_blocks.12 +25-08-31 10:19:35 | D | - Calibrating low-rank branch for transformer_blocks.12.attn.to_q, transformer_blocks.12.attn.to_k, transformer_blocks.12.attn.to_v +25-08-31 10:19:35 | D | + w: sfp4_e2m1_all +25-08-31 10:19:35 | D | + x: sfp4_e2m1_all +25-08-31 10:19:35 | D | + y: None +25-08-31 10:19:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:19:35 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:19:35 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:19:36 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:19:55 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:19:55 | D | - error = [ 3404.3641, 3348.9267, 3322.9008, 3290.0518, 3282.8098, 3261.8012, 3245.0208, 3243.2643, 3231.9035, 3235.2049] +25-08-31 10:19:55 | D | - best error = [ 3404.3641, 3348.9267, 3322.9008, 3290.0518, 3282.8098, 3261.8012, 3245.0208, 3243.2643, 3231.9035, 3231.9035] +25-08-31 10:19:56 | D | + Adding low-rank branches to transformer_blocks.12.attn.to_q, transformer_blocks.12.attn.to_k, transformer_blocks.12.attn.to_v +25-08-31 10:19:56 | D | - Calibrating low-rank branch for transformer_blocks.12.attn.add_q_proj, transformer_blocks.12.attn.add_k_proj, transformer_blocks.12.attn.add_v_proj +25-08-31 10:19:56 | D | + w: sfp4_e2m1_all +25-08-31 10:19:56 | D | + x: sfp4_e2m1_all +25-08-31 10:19:56 | D | + y: None +25-08-31 10:19:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:19:56 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:19:56 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:19:57 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:20:11 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 10:20:11 | D | - error = [ 1101.5773, 1095.2911, 1067.5911, 1049.7621, 1040.5679, 1024.1826, 1019.3487, 1020.7244] +25-08-31 10:20:11 | D | - best error = [ 1101.5773, 1095.2911, 1067.5911, 1049.7621, 1040.5679, 1024.1826, 1019.3487, 1019.3487] +25-08-31 10:20:11 | D | + Adding low-rank branches to transformer_blocks.12.attn.add_q_proj, transformer_blocks.12.attn.add_k_proj, transformer_blocks.12.attn.add_v_proj +25-08-31 10:20:11 | D | - Calibrating low-rank branch for transformer_blocks.12.attn.to_out.0 +25-08-31 10:20:11 | D | + w: sfp4_e2m1_all +25-08-31 10:20:11 | D | + x: sfp4_e2m1_all +25-08-31 10:20:11 | D | + y: None +25-08-31 10:20:11 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:20:11 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:20:11 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:20:13 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:20:27 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:20:27 | D | - error = [ 9009.8364, 8931.9794, 8874.3000, 8823.6845, 8800.6877, 8771.5960, 8754.2214, 8740.2764, 8725.9012, 8713.8786] +25-08-31 10:20:27 | D | - best error = [ 9009.8364, 8931.9794, 8874.3000, 8823.6845, 8800.6877, 8771.5960, 8754.2214, 8740.2764, 8725.9012, 8713.8786] +25-08-31 10:20:41 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18] +25-08-31 10:20:41 | D | - error = [ 8700.0589, 8694.5164, 8682.2264, 8672.2500, 8662.1614, 8658.4613, 8650.3563, 8644.6562, 8662.3313] +25-08-31 10:20:41 | D | - best error = [ 8700.0589, 8694.5164, 8682.2264, 8672.2500, 8662.1614, 8658.4613, 8650.3563, 8644.6562, 8644.6562] +25-08-31 10:20:41 | D | + Adding low-rank branches to transformer_blocks.12.attn.to_out.0 +25-08-31 10:20:41 | D | - Calibrating low-rank branch for transformer_blocks.12.attn.to_add_out +25-08-31 10:20:41 | D | + w: sfp4_e2m1_all +25-08-31 10:20:41 | D | + x: sfp4_e2m1_all +25-08-31 10:20:41 | D | + y: None +25-08-31 10:20:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:20:41 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:20:41 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:20:42 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:20:52 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:20:52 | D | - error = [ 2034.0004, 2010.8645, 1997.2264, 1987.7219, 1984.1810, 1975.9087, 1967.0968, 1963.3975, 1953.6781, 1952.0648] +25-08-31 10:20:52 | D | - best error = [ 2034.0004, 2010.8645, 1997.2264, 1987.7219, 1984.1810, 1975.9087, 1967.0968, 1963.3975, 1953.6781, 1952.0648] +25-08-31 10:20:55 | D | - iter = [ 10, 11, 12] +25-08-31 10:20:55 | D | - error = [ 1949.5130, 1944.1565, 1950.4303] +25-08-31 10:20:55 | D | - best error = [ 1949.5130, 1944.1565, 1944.1565] +25-08-31 10:20:55 | D | + Adding low-rank branches to transformer_blocks.12.attn.to_add_out +25-08-31 10:20:55 | D | - Calibrating low-rank branch for transformer_blocks.12.ff.net.0.proj +25-08-31 10:20:55 | D | + w: sfp4_e2m1_all +25-08-31 10:20:55 | D | + x: sfp4_e2m1_all +25-08-31 10:20:55 | D | + y: None +25-08-31 10:20:55 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:20:55 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:20:55 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:20:57 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:21:17 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:21:17 | D | - error = [ 5296.1741, 5231.6330, 5190.8897, 5158.2402, 5138.0580, 5118.6692, 5105.8609, 5093.7562, 5084.9720, 5074.6080] +25-08-31 10:21:17 | D | - best error = [ 5296.1741, 5231.6330, 5190.8897, 5158.2402, 5138.0580, 5118.6692, 5105.8609, 5093.7562, 5084.9720, 5074.6080] +25-08-31 10:21:19 | D | - iter = [ 10] +25-08-31 10:21:19 | D | - error = [ 5085.1509] +25-08-31 10:21:19 | D | - best error = [ 5074.6080] +25-08-31 10:21:20 | D | + Adding low-rank branches to transformer_blocks.12.ff.net.0.proj +25-08-31 10:21:20 | D | - Calibrating low-rank branch for transformer_blocks.12.ff.net.2 +25-08-31 10:21:20 | D | + w: sfp4_e2m1_all +25-08-31 10:21:20 | D | + x: sfp4_e2m1_all +25-08-31 10:21:20 | D | + y: None +25-08-31 10:21:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:21:20 | D | + finished parsing calibration arguments, ram usage: 14.7 +25-08-31 10:21:20 | D | + finished resetting calibrator, ram usage: 14.7 +25-08-31 10:21:24 | D | + finished calculating the original outputs, ram usage: 14.7 +25-08-31 10:21:54 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:21:54 | D | - error = [ 4788.3555, 4759.9674, 4739.6443, 4723.6105, 4712.7788, 4701.9609, 4691.9915, 4683.8858, 4678.3497, 4672.3324] +25-08-31 10:21:54 | D | - best error = [ 4788.3555, 4759.9674, 4739.6443, 4723.6105, 4712.7788, 4701.9609, 4691.9915, 4683.8858, 4678.3497, 4672.3324] +25-08-31 10:22:24 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:22:24 | D | - error = [ 4667.4091, 4664.5611, 4659.4274, 4654.7780, 4651.9875, 4650.3727, 4647.8812, 4644.5878, 4640.2489, 4637.9875] +25-08-31 10:22:24 | D | - best error = [ 4667.4091, 4664.5611, 4659.4274, 4654.7780, 4651.9875, 4650.3727, 4647.8812, 4644.5878, 4640.2489, 4637.9875] +25-08-31 10:22:51 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28] +25-08-31 10:22:51 | D | - error = [ 4636.8705, 4635.6339, 4635.0040, 4634.0924, 4632.7638, 4632.1075, 4630.7475, 4629.2012, 4629.2546] +25-08-31 10:22:51 | D | - best error = [ 4636.8705, 4635.6339, 4635.0040, 4634.0924, 4632.7638, 4632.1075, 4630.7475, 4629.2012, 4629.2012] +25-08-31 10:22:51 | D | + Adding low-rank branches to transformer_blocks.12.ff.net.2 +25-08-31 10:22:51 | D | - Calibrating low-rank branch for transformer_blocks.12.ff_context.net.0.proj +25-08-31 10:22:51 | D | + w: sfp4_e2m1_all +25-08-31 10:22:51 | D | + x: sfp4_e2m1_all +25-08-31 10:22:51 | D | + y: None +25-08-31 10:22:51 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:22:51 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:22:51 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:22:52 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:22:59 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 10:22:59 | D | - error = [ 1186.7091, 1180.0289, 1178.0812, 1174.1745, 1171.8348, 1172.0119] +25-08-31 10:22:59 | D | - best error = [ 1186.7091, 1180.0289, 1178.0812, 1174.1745, 1171.8348, 1171.8348] +25-08-31 10:22:59 | D | + Adding low-rank branches to transformer_blocks.12.ff_context.net.0.proj +25-08-31 10:22:59 | D | - Calibrating low-rank branch for transformer_blocks.12.ff_context.net.2 +25-08-31 10:22:59 | D | + w: sfp4_e2m1_all +25-08-31 10:22:59 | D | + x: sfp4_e2m1_all +25-08-31 10:22:59 | D | + y: None +25-08-31 10:22:59 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:22:59 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:22:59 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:23:00 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:23:11 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 10:23:11 | D | - error = [ 382.4460, 380.6193, 378.6027, 377.5574, 377.0402, 376.8012, 376.3639, 376.3178, 377.3595] +25-08-31 10:23:11 | D | - best error = [ 382.4460, 380.6193, 378.6027, 377.5574, 377.0402, 376.8012, 376.3639, 376.3178, 376.3178] +25-08-31 10:23:11 | D | + Adding low-rank branches to transformer_blocks.12.ff_context.net.2 +25-08-31 10:23:29 | D | - Calibrating low-rank branches of block transformer_blocks.13 +25-08-31 10:23:29 | D | - Calibrating low-rank branch for transformer_blocks.13.attn.to_q, transformer_blocks.13.attn.to_k, transformer_blocks.13.attn.to_v +25-08-31 10:23:29 | D | + w: sfp4_e2m1_all +25-08-31 10:23:29 | D | + x: sfp4_e2m1_all +25-08-31 10:23:29 | D | + y: None +25-08-31 10:23:29 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:23:29 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:23:29 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:23:30 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:23:49 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:23:49 | D | - error = [ 3549.8831, 3476.3210, 3463.1711, 3445.2810, 3408.1806, 3401.7269, 3377.6945, 3364.2091, 3363.0056, 3349.4477] +25-08-31 10:23:49 | D | - best error = [ 3549.8831, 3476.3210, 3463.1711, 3445.2810, 3408.1806, 3401.7269, 3377.6945, 3364.2091, 3363.0056, 3349.4477] +25-08-31 10:23:53 | D | - iter = [ 10, 11] +25-08-31 10:23:53 | D | - error = [ 3346.6506, 3347.8156] +25-08-31 10:23:53 | D | - best error = [ 3346.6506, 3346.6506] +25-08-31 10:23:53 | D | + Adding low-rank branches to transformer_blocks.13.attn.to_q, transformer_blocks.13.attn.to_k, transformer_blocks.13.attn.to_v +25-08-31 10:23:53 | D | - Calibrating low-rank branch for transformer_blocks.13.attn.add_q_proj, transformer_blocks.13.attn.add_k_proj, transformer_blocks.13.attn.add_v_proj +25-08-31 10:23:53 | D | + w: sfp4_e2m1_all +25-08-31 10:23:53 | D | + x: sfp4_e2m1_all +25-08-31 10:23:53 | D | + y: None +25-08-31 10:23:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:23:53 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:23:53 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:23:54 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:24:02 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-31 10:24:02 | D | - error = [ 712.2509, 682.8934, 681.8349, 669.2842, 670.3338] +25-08-31 10:24:02 | D | - best error = [ 712.2509, 682.8934, 681.8349, 669.2842, 669.2842] +25-08-31 10:24:03 | D | + Adding low-rank branches to transformer_blocks.13.attn.add_q_proj, transformer_blocks.13.attn.add_k_proj, transformer_blocks.13.attn.add_v_proj +25-08-31 10:24:03 | D | - Calibrating low-rank branch for transformer_blocks.13.attn.to_out.0 +25-08-31 10:24:03 | D | + w: sfp4_e2m1_all +25-08-31 10:24:03 | D | + x: sfp4_e2m1_all +25-08-31 10:24:03 | D | + y: None +25-08-31 10:24:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:24:03 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:24:03 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:24:04 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:24:17 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 10:24:17 | D | - error = [ 3900.0274, 3865.8032, 3841.0290, 3823.6008, 3810.4455, 3801.2398, 3792.9668, 3788.4551, 3788.7862] +25-08-31 10:24:17 | D | - best error = [ 3900.0274, 3865.8032, 3841.0290, 3823.6008, 3810.4455, 3801.2398, 3792.9668, 3788.4551, 3788.4551] +25-08-31 10:24:18 | D | + Adding low-rank branches to transformer_blocks.13.attn.to_out.0 +25-08-31 10:24:18 | D | - Calibrating low-rank branch for transformer_blocks.13.attn.to_add_out +25-08-31 10:24:18 | D | + w: sfp4_e2m1_all +25-08-31 10:24:18 | D | + x: sfp4_e2m1_all +25-08-31 10:24:18 | D | + y: None +25-08-31 10:24:18 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:24:18 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:24:18 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:24:18 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:24:28 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:24:28 | D | - error = [ 759.6412, 750.8111, 745.4548, 742.1030, 738.9142, 738.5592, 736.7522, 735.3849, 734.2080, 734.8947] +25-08-31 10:24:28 | D | - best error = [ 759.6412, 750.8111, 745.4548, 742.1030, 738.9142, 738.5592, 736.7522, 735.3849, 734.2080, 734.2080] +25-08-31 10:24:29 | D | + Adding low-rank branches to transformer_blocks.13.attn.to_add_out +25-08-31 10:24:29 | D | - Calibrating low-rank branch for transformer_blocks.13.ff.net.0.proj +25-08-31 10:24:29 | D | + w: sfp4_e2m1_all +25-08-31 10:24:29 | D | + x: sfp4_e2m1_all +25-08-31 10:24:29 | D | + y: None +25-08-31 10:24:29 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:24:29 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:24:29 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:24:30 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:24:48 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 10:24:48 | D | - error = [ 5250.3699, 5191.3291, 5152.6494, 5122.4580, 5101.2912, 5084.4715, 5071.0573, 5060.1097, 5068.7444] +25-08-31 10:24:48 | D | - best error = [ 5250.3699, 5191.3291, 5152.6494, 5122.4580, 5101.2912, 5084.4715, 5071.0573, 5060.1097, 5060.1097] +25-08-31 10:24:48 | D | + Adding low-rank branches to transformer_blocks.13.ff.net.0.proj +25-08-31 10:24:49 | D | - Calibrating low-rank branch for transformer_blocks.13.ff.net.2 +25-08-31 10:24:49 | D | + w: sfp4_e2m1_all +25-08-31 10:24:49 | D | + x: sfp4_e2m1_all +25-08-31 10:24:49 | D | + y: None +25-08-31 10:24:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:24:49 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:24:49 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:24:53 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:25:21 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:25:21 | D | - error = [ 6360.4676, 6327.9125, 6300.8669, 6283.6413, 6266.6032, 6253.7512, 6241.8677, 6231.6251, 6223.0144, 6214.8171] +25-08-31 10:25:21 | D | - best error = [ 6360.4676, 6327.9125, 6300.8669, 6283.6413, 6266.6032, 6253.7512, 6241.8677, 6231.6251, 6223.0144, 6214.8171] +25-08-31 10:25:52 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:25:52 | D | - error = [ 6206.9574, 6199.9452, 6196.1543, 6192.6389, 6188.1067, 6184.6261, 6182.9660, 6177.2648, 6173.4201, 6171.0965] +25-08-31 10:25:52 | D | - best error = [ 6206.9574, 6199.9452, 6196.1543, 6192.6389, 6188.1067, 6184.6261, 6182.9660, 6177.2648, 6173.4201, 6171.0965] +25-08-31 10:26:15 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27] +25-08-31 10:26:15 | D | - error = [ 6167.2120, 6165.6698, 6165.2192, 6164.4685, 6164.2482, 6161.4213, 6159.3899, 6177.2525] +25-08-31 10:26:15 | D | - best error = [ 6167.2120, 6165.6698, 6165.2192, 6164.4685, 6164.2482, 6161.4213, 6159.3899, 6159.3899] +25-08-31 10:26:15 | D | + Adding low-rank branches to transformer_blocks.13.ff.net.2 +25-08-31 10:26:15 | D | - Calibrating low-rank branch for transformer_blocks.13.ff_context.net.0.proj +25-08-31 10:26:15 | D | + w: sfp4_e2m1_all +25-08-31 10:26:15 | D | + x: sfp4_e2m1_all +25-08-31 10:26:15 | D | + y: None +25-08-31 10:26:15 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:26:15 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:26:15 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:26:16 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:26:27 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:26:27 | D | - error = [ 811.9035, 804.4664, 803.9133, 801.0966, 799.1069, 798.7342, 796.4379, 793.3232, 793.1705, 793.6673] +25-08-31 10:26:27 | D | - best error = [ 811.9035, 804.4664, 803.9133, 801.0966, 799.1069, 798.7342, 796.4379, 793.3232, 793.1705, 793.1705] +25-08-31 10:26:27 | D | + Adding low-rank branches to transformer_blocks.13.ff_context.net.0.proj +25-08-31 10:26:28 | D | - Calibrating low-rank branch for transformer_blocks.13.ff_context.net.2 +25-08-31 10:26:28 | D | + w: sfp4_e2m1_all +25-08-31 10:26:28 | D | + x: sfp4_e2m1_all +25-08-31 10:26:28 | D | + y: None +25-08-31 10:26:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:26:28 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:26:28 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:26:28 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:26:40 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 10:26:40 | D | - error = [ 240.7962, 239.5385, 238.3525, 237.5129, 236.8415, 236.6547, 236.2913, 236.1880, 236.3207] +25-08-31 10:26:40 | D | - best error = [ 240.7962, 239.5385, 238.3525, 237.5129, 236.8415, 236.6547, 236.2913, 236.1880, 236.1880] +25-08-31 10:26:40 | D | + Adding low-rank branches to transformer_blocks.13.ff_context.net.2 +25-08-31 10:26:58 | D | - Calibrating low-rank branches of block transformer_blocks.14 +25-08-31 10:26:58 | D | - Calibrating low-rank branch for transformer_blocks.14.attn.to_q, transformer_blocks.14.attn.to_k, transformer_blocks.14.attn.to_v +25-08-31 10:26:58 | D | + w: sfp4_e2m1_all +25-08-31 10:26:58 | D | + x: sfp4_e2m1_all +25-08-31 10:26:58 | D | + y: None +25-08-31 10:26:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:26:58 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:26:58 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:26:59 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:27:18 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:27:18 | D | - error = [ 2887.3942, 2839.7257, 2799.1626, 2770.0857, 2751.1135, 2737.6364, 2733.6959, 2728.2587, 2724.0011, 2722.7825] +25-08-31 10:27:18 | D | - best error = [ 2887.3942, 2839.7257, 2799.1626, 2770.0857, 2751.1135, 2737.6364, 2733.6959, 2728.2587, 2724.0011, 2722.7825] +25-08-31 10:27:20 | D | - iter = [ 10] +25-08-31 10:27:20 | D | - error = [ 2724.6792] +25-08-31 10:27:20 | D | - best error = [ 2722.7825] +25-08-31 10:27:20 | D | + Adding low-rank branches to transformer_blocks.14.attn.to_q, transformer_blocks.14.attn.to_k, transformer_blocks.14.attn.to_v +25-08-31 10:27:20 | D | - Calibrating low-rank branch for transformer_blocks.14.attn.add_q_proj, transformer_blocks.14.attn.add_k_proj, transformer_blocks.14.attn.add_v_proj +25-08-31 10:27:20 | D | + w: sfp4_e2m1_all +25-08-31 10:27:20 | D | + x: sfp4_e2m1_all +25-08-31 10:27:20 | D | + y: None +25-08-31 10:27:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:27:20 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:27:20 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:27:21 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:27:32 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 10:27:32 | D | - error = [ 966.2852, 923.4769, 914.5662, 908.8360, 904.2197, 901.4688, 907.5060] +25-08-31 10:27:32 | D | - best error = [ 966.2852, 923.4769, 914.5662, 908.8360, 904.2197, 901.4688, 901.4688] +25-08-31 10:27:33 | D | + Adding low-rank branches to transformer_blocks.14.attn.add_q_proj, transformer_blocks.14.attn.add_k_proj, transformer_blocks.14.attn.add_v_proj +25-08-31 10:27:33 | D | - Calibrating low-rank branch for transformer_blocks.14.attn.to_out.0 +25-08-31 10:27:33 | D | + w: sfp4_e2m1_all +25-08-31 10:27:33 | D | + x: sfp4_e2m1_all +25-08-31 10:27:33 | D | + y: None +25-08-31 10:27:33 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:27:33 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:27:33 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:27:34 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:27:47 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 10:27:47 | D | - error = [12113.4755, 12011.4579, 11931.0466, 11889.2139, 11835.3836, 11795.9769, 11767.7879, 11738.8211, 11757.4650] +25-08-31 10:27:47 | D | - best error = [12113.4755, 12011.4579, 11931.0466, 11889.2139, 11835.3836, 11795.9769, 11767.7879, 11738.8211, 11738.8211] +25-08-31 10:27:47 | D | + Adding low-rank branches to transformer_blocks.14.attn.to_out.0 +25-08-31 10:27:47 | D | - Calibrating low-rank branch for transformer_blocks.14.attn.to_add_out +25-08-31 10:27:47 | D | + w: sfp4_e2m1_all +25-08-31 10:27:47 | D | + x: sfp4_e2m1_all +25-08-31 10:27:47 | D | + y: None +25-08-31 10:27:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:27:47 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:27:47 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:27:48 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:27:56 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 10:27:56 | D | - error = [ 2242.7203, 2220.9311, 2204.1792, 2189.5339, 2180.0297, 2173.0959, 2171.8692, 2172.9693] +25-08-31 10:27:56 | D | - best error = [ 2242.7203, 2220.9311, 2204.1792, 2189.5339, 2180.0297, 2173.0959, 2171.8692, 2171.8692] +25-08-31 10:27:56 | D | + Adding low-rank branches to transformer_blocks.14.attn.to_add_out +25-08-31 10:27:56 | D | - Calibrating low-rank branch for transformer_blocks.14.ff.net.0.proj +25-08-31 10:27:56 | D | + w: sfp4_e2m1_all +25-08-31 10:27:56 | D | + x: sfp4_e2m1_all +25-08-31 10:27:56 | D | + y: None +25-08-31 10:27:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:27:56 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:27:56 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:27:58 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:28:12 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 10:28:12 | D | - error = [ 5425.4334, 5371.9592, 5326.5302, 5286.8885, 5261.0887, 5242.4700, 5244.5358] +25-08-31 10:28:12 | D | - best error = [ 5425.4334, 5371.9592, 5326.5302, 5286.8885, 5261.0887, 5242.4700, 5242.4700] +25-08-31 10:28:12 | D | + Adding low-rank branches to transformer_blocks.14.ff.net.0.proj +25-08-31 10:28:12 | D | - Calibrating low-rank branch for transformer_blocks.14.ff.net.2 +25-08-31 10:28:12 | D | + w: sfp4_e2m1_all +25-08-31 10:28:12 | D | + x: sfp4_e2m1_all +25-08-31 10:28:12 | D | + y: None +25-08-31 10:28:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:28:12 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:28:12 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:28:17 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:28:46 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:28:46 | D | - error = [ 5402.1979, 5370.6588, 5346.2810, 5329.2998, 5317.1959, 5301.0320, 5294.2187, 5288.2937, 5281.0792, 5276.8337] +25-08-31 10:28:46 | D | - best error = [ 5402.1979, 5370.6588, 5346.2810, 5329.2998, 5317.1959, 5301.0320, 5294.2187, 5288.2937, 5281.0792, 5276.8337] +25-08-31 10:29:14 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18] +25-08-31 10:29:14 | D | - error = [ 5271.5091, 5263.6924, 5260.4418, 5256.8300, 5253.1537, 5250.4804, 5246.9462, 5245.4045, 5257.8526] +25-08-31 10:29:14 | D | - best error = [ 5271.5091, 5263.6924, 5260.4418, 5256.8300, 5253.1537, 5250.4804, 5246.9462, 5245.4045, 5245.4045] +25-08-31 10:29:14 | D | + Adding low-rank branches to transformer_blocks.14.ff.net.2 +25-08-31 10:29:14 | D | - Calibrating low-rank branch for transformer_blocks.14.ff_context.net.0.proj +25-08-31 10:29:14 | D | + w: sfp4_e2m1_all +25-08-31 10:29:14 | D | + x: sfp4_e2m1_all +25-08-31 10:29:14 | D | + y: None +25-08-31 10:29:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:29:14 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:29:14 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:29:15 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:29:26 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:29:26 | D | - error = [ 1346.9981, 1342.1516, 1332.6932, 1327.0157, 1323.7940, 1320.7590, 1318.7138, 1318.3059, 1317.2607, 1314.1630] +25-08-31 10:29:26 | D | - best error = [ 1346.9981, 1342.1516, 1332.6932, 1327.0157, 1323.7940, 1320.7590, 1318.7138, 1318.3059, 1317.2607, 1314.1630] +25-08-31 10:29:28 | D | - iter = [ 10, 11] +25-08-31 10:29:28 | D | - error = [ 1314.0705, 1314.8328] +25-08-31 10:29:28 | D | - best error = [ 1314.0705, 1314.0705] +25-08-31 10:29:28 | D | + Adding low-rank branches to transformer_blocks.14.ff_context.net.0.proj +25-08-31 10:29:28 | D | - Calibrating low-rank branch for transformer_blocks.14.ff_context.net.2 +25-08-31 10:29:28 | D | + w: sfp4_e2m1_all +25-08-31 10:29:28 | D | + x: sfp4_e2m1_all +25-08-31 10:29:28 | D | + y: None +25-08-31 10:29:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:29:28 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:29:28 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:29:29 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:29:42 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:29:42 | D | - error = [ 560.1264, 555.4988, 553.8914, 553.5807, 551.7851, 551.4179, 551.2701, 550.5277, 549.5270, 549.9853] +25-08-31 10:29:42 | D | - best error = [ 560.1264, 555.4988, 553.8914, 553.5807, 551.7851, 551.4179, 551.2701, 550.5277, 549.5270, 549.5270] +25-08-31 10:29:42 | D | + Adding low-rank branches to transformer_blocks.14.ff_context.net.2 +25-08-31 10:30:00 | D | - Calibrating low-rank branches of block transformer_blocks.15 +25-08-31 10:30:00 | D | - Calibrating low-rank branch for transformer_blocks.15.attn.to_q, transformer_blocks.15.attn.to_k, transformer_blocks.15.attn.to_v +25-08-31 10:30:00 | D | + w: sfp4_e2m1_all +25-08-31 10:30:00 | D | + x: sfp4_e2m1_all +25-08-31 10:30:00 | D | + y: None +25-08-31 10:30:00 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:30:00 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:30:00 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:30:01 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:30:16 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 10:30:16 | D | - error = [ 3113.1950, 3045.8242, 2977.0852, 2911.3751, 2873.9367, 2846.5540, 2817.2183, 2820.8708] +25-08-31 10:30:16 | D | - best error = [ 3113.1950, 3045.8242, 2977.0852, 2911.3751, 2873.9367, 2846.5540, 2817.2183, 2817.2183] +25-08-31 10:30:16 | D | + Adding low-rank branches to transformer_blocks.15.attn.to_q, transformer_blocks.15.attn.to_k, transformer_blocks.15.attn.to_v +25-08-31 10:30:17 | D | - Calibrating low-rank branch for transformer_blocks.15.attn.add_q_proj, transformer_blocks.15.attn.add_k_proj, transformer_blocks.15.attn.add_v_proj +25-08-31 10:30:17 | D | + w: sfp4_e2m1_all +25-08-31 10:30:17 | D | + x: sfp4_e2m1_all +25-08-31 10:30:17 | D | + y: None +25-08-31 10:30:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:30:17 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:30:17 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:30:18 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:30:24 | D | - iter = [ 0, 1, 2, 3] +25-08-31 10:30:24 | D | - error = [ 951.1566, 909.0204, 888.4597, 898.1627] +25-08-31 10:30:24 | D | - best error = [ 951.1566, 909.0204, 888.4597, 888.4597] +25-08-31 10:30:24 | D | + Adding low-rank branches to transformer_blocks.15.attn.add_q_proj, transformer_blocks.15.attn.add_k_proj, transformer_blocks.15.attn.add_v_proj +25-08-31 10:30:25 | D | - Calibrating low-rank branch for transformer_blocks.15.attn.to_out.0 +25-08-31 10:30:25 | D | + w: sfp4_e2m1_all +25-08-31 10:30:25 | D | + x: sfp4_e2m1_all +25-08-31 10:30:25 | D | + y: None +25-08-31 10:30:25 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:30:25 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:30:25 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:30:26 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:30:40 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:30:40 | D | - error = [ 7062.5800, 7003.2710, 6957.5708, 6922.7835, 6891.1758, 6873.0957, 6860.3434, 6843.4915, 6829.8214, 6818.5705] +25-08-31 10:30:40 | D | - best error = [ 7062.5800, 7003.2710, 6957.5708, 6922.7835, 6891.1758, 6873.0957, 6860.3434, 6843.4915, 6829.8214, 6818.5705] +25-08-31 10:30:55 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:30:55 | D | - error = [ 6810.4250, 6802.1799, 6795.6074, 6790.2059, 6783.1034, 6779.4607, 6777.0519, 6773.4925, 6767.8759, 6766.2353] +25-08-31 10:30:55 | D | - best error = [ 6810.4250, 6802.1799, 6795.6074, 6790.2059, 6783.1034, 6779.4607, 6777.0519, 6773.4925, 6767.8759, 6766.2353] +25-08-31 10:30:57 | D | - iter = [ 20, 21] +25-08-31 10:30:57 | D | - error = [ 6763.2362, 6771.8210] +25-08-31 10:30:57 | D | - best error = [ 6763.2362, 6763.2362] +25-08-31 10:30:58 | D | + Adding low-rank branches to transformer_blocks.15.attn.to_out.0 +25-08-31 10:30:58 | D | - Calibrating low-rank branch for transformer_blocks.15.attn.to_add_out +25-08-31 10:30:58 | D | + w: sfp4_e2m1_all +25-08-31 10:30:58 | D | + x: sfp4_e2m1_all +25-08-31 10:30:58 | D | + y: None +25-08-31 10:30:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:30:58 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:30:58 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:30:58 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:31:07 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 10:31:07 | D | - error = [ 1130.1970, 1115.2125, 1102.9904, 1097.6768, 1092.0225, 1089.5805, 1084.2773, 1085.8164] +25-08-31 10:31:07 | D | - best error = [ 1130.1970, 1115.2125, 1102.9904, 1097.6768, 1092.0225, 1089.5805, 1084.2773, 1084.2773] +25-08-31 10:31:07 | D | + Adding low-rank branches to transformer_blocks.15.attn.to_add_out +25-08-31 10:31:07 | D | - Calibrating low-rank branch for transformer_blocks.15.ff.net.0.proj +25-08-31 10:31:07 | D | + w: sfp4_e2m1_all +25-08-31 10:31:07 | D | + x: sfp4_e2m1_all +25-08-31 10:31:07 | D | + y: None +25-08-31 10:31:07 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:31:07 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:31:07 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:31:09 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:31:29 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:31:29 | D | - error = [ 5043.6816, 4986.6460, 4949.8212, 4923.6190, 4922.2759, 4898.8770, 4881.7460, 4876.6374, 4868.1204, 4852.9385] +25-08-31 10:31:29 | D | - best error = [ 5043.6816, 4986.6460, 4949.8212, 4923.6190, 4922.2759, 4898.8770, 4881.7460, 4876.6374, 4868.1204, 4852.9385] +25-08-31 10:31:39 | D | - iter = [ 10, 11, 12, 13, 14] +25-08-31 10:31:39 | D | - error = [ 4846.0322, 4840.0167, 4834.1022, 4829.6012, 4857.2999] +25-08-31 10:31:39 | D | - best error = [ 4846.0322, 4840.0167, 4834.1022, 4829.6012, 4829.6012] +25-08-31 10:31:39 | D | + Adding low-rank branches to transformer_blocks.15.ff.net.0.proj +25-08-31 10:31:40 | D | - Calibrating low-rank branch for transformer_blocks.15.ff.net.2 +25-08-31 10:31:40 | D | + w: sfp4_e2m1_all +25-08-31 10:31:40 | D | + x: sfp4_e2m1_all +25-08-31 10:31:40 | D | + y: None +25-08-31 10:31:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:31:40 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:31:40 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:31:44 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:32:05 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 10:32:05 | D | - error = [ 5256.6213, 5224.5792, 5203.8734, 5185.0929, 5170.2810, 5160.5556, 5160.9633] +25-08-31 10:32:05 | D | - best error = [ 5256.6213, 5224.5792, 5203.8734, 5185.0929, 5170.2810, 5160.5556, 5160.5556] +25-08-31 10:32:05 | D | + Adding low-rank branches to transformer_blocks.15.ff.net.2 +25-08-31 10:32:05 | D | - Calibrating low-rank branch for transformer_blocks.15.ff_context.net.0.proj +25-08-31 10:32:05 | D | + w: sfp4_e2m1_all +25-08-31 10:32:05 | D | + x: sfp4_e2m1_all +25-08-31 10:32:05 | D | + y: None +25-08-31 10:32:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:32:05 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:32:05 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:32:06 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:32:11 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-31 10:32:11 | D | - error = [ 1154.5530, 1126.1864, 1109.1675, 1097.7129, 1098.6989] +25-08-31 10:32:11 | D | - best error = [ 1154.5530, 1126.1864, 1109.1675, 1097.7129, 1097.7129] +25-08-31 10:32:11 | D | + Adding low-rank branches to transformer_blocks.15.ff_context.net.0.proj +25-08-31 10:32:11 | D | - Calibrating low-rank branch for transformer_blocks.15.ff_context.net.2 +25-08-31 10:32:11 | D | + w: sfp4_e2m1_all +25-08-31 10:32:12 | D | + x: sfp4_e2m1_all +25-08-31 10:32:12 | D | + y: None +25-08-31 10:32:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:32:12 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:32:12 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:32:12 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:32:20 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 10:32:20 | D | - error = [ 406.4772, 402.8112, 400.1525, 398.4567, 396.9200, 396.9616] +25-08-31 10:32:20 | D | - best error = [ 406.4772, 402.8112, 400.1525, 398.4567, 396.9200, 396.9200] +25-08-31 10:32:20 | D | + Adding low-rank branches to transformer_blocks.15.ff_context.net.2 +25-08-31 10:32:38 | D | - Calibrating low-rank branches of block transformer_blocks.16 +25-08-31 10:32:38 | D | - Calibrating low-rank branch for transformer_blocks.16.attn.to_q, transformer_blocks.16.attn.to_k, transformer_blocks.16.attn.to_v +25-08-31 10:32:38 | D | + w: sfp4_e2m1_all +25-08-31 10:32:38 | D | + x: sfp4_e2m1_all +25-08-31 10:32:38 | D | + y: None +25-08-31 10:32:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:32:38 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:32:38 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:32:39 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:32:50 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 10:32:50 | D | - error = [ 2784.7066, 2750.1102, 2722.6045, 2693.4428, 2669.3730, 2672.0010] +25-08-31 10:32:50 | D | - best error = [ 2784.7066, 2750.1102, 2722.6045, 2693.4428, 2669.3730, 2669.3730] +25-08-31 10:32:50 | D | + Adding low-rank branches to transformer_blocks.16.attn.to_q, transformer_blocks.16.attn.to_k, transformer_blocks.16.attn.to_v +25-08-31 10:32:50 | D | - Calibrating low-rank branch for transformer_blocks.16.attn.add_q_proj, transformer_blocks.16.attn.add_k_proj, transformer_blocks.16.attn.add_v_proj +25-08-31 10:32:50 | D | + w: sfp4_e2m1_all +25-08-31 10:32:50 | D | + x: sfp4_e2m1_all +25-08-31 10:32:50 | D | + y: None +25-08-31 10:32:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:32:50 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:32:50 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:32:51 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:33:06 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 10:33:06 | D | - error = [ 1076.1220, 1032.9198, 1017.2751, 1010.5859, 1004.5379, 992.8088, 988.8130, 988.6859, 989.1667] +25-08-31 10:33:06 | D | - best error = [ 1076.1220, 1032.9198, 1017.2751, 1010.5859, 1004.5379, 992.8088, 988.8130, 988.6859, 988.6859] +25-08-31 10:33:06 | D | + Adding low-rank branches to transformer_blocks.16.attn.add_q_proj, transformer_blocks.16.attn.add_k_proj, transformer_blocks.16.attn.add_v_proj +25-08-31 10:33:06 | D | - Calibrating low-rank branch for transformer_blocks.16.attn.to_out.0 +25-08-31 10:33:06 | D | + w: sfp4_e2m1_all +25-08-31 10:33:06 | D | + x: sfp4_e2m1_all +25-08-31 10:33:06 | D | + y: None +25-08-31 10:33:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:33:06 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:33:06 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:33:08 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:33:22 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:33:22 | D | - error = [10486.7917, 10382.0071, 10313.2656, 10269.2302, 10233.2931, 10200.0471, 10179.6684, 10155.5372, 10145.5843, 10134.9834] +25-08-31 10:33:22 | D | - best error = [10486.7917, 10382.0071, 10313.2656, 10269.2302, 10233.2931, 10200.0471, 10179.6684, 10155.5372, 10145.5843, 10134.9834] +25-08-31 10:33:36 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:33:36 | D | - error = [10121.0039, 10108.2319, 10097.2471, 10090.8695, 10083.3018, 10074.9849, 10060.6871, 10053.6517, 10048.5677, 10063.0814] +25-08-31 10:33:36 | D | - best error = [10121.0039, 10108.2319, 10097.2471, 10090.8695, 10083.3018, 10074.9849, 10060.6871, 10053.6517, 10048.5677, 10048.5677] +25-08-31 10:33:36 | D | + Adding low-rank branches to transformer_blocks.16.attn.to_out.0 +25-08-31 10:33:37 | D | - Calibrating low-rank branch for transformer_blocks.16.attn.to_add_out +25-08-31 10:33:37 | D | + w: sfp4_e2m1_all +25-08-31 10:33:37 | D | + x: sfp4_e2m1_all +25-08-31 10:33:37 | D | + y: None +25-08-31 10:33:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:33:37 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:33:37 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:33:37 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:33:46 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 10:33:46 | D | - error = [ 1849.9343, 1823.6877, 1812.0335, 1799.2318, 1789.6106, 1779.9605, 1776.2483, 1769.1675, 1769.9620] +25-08-31 10:33:46 | D | - best error = [ 1849.9343, 1823.6877, 1812.0335, 1799.2318, 1789.6106, 1779.9605, 1776.2483, 1769.1675, 1769.1675] +25-08-31 10:33:46 | D | + Adding low-rank branches to transformer_blocks.16.attn.to_add_out +25-08-31 10:33:47 | D | - Calibrating low-rank branch for transformer_blocks.16.ff.net.0.proj +25-08-31 10:33:47 | D | + w: sfp4_e2m1_all +25-08-31 10:33:47 | D | + x: sfp4_e2m1_all +25-08-31 10:33:47 | D | + y: None +25-08-31 10:33:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:33:47 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:33:47 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:33:48 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:34:08 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:34:08 | D | - error = [ 5108.4698, 5046.3391, 5005.4053, 4977.6388, 4955.3862, 4938.6919, 4927.5404, 4916.0814, 4907.0869, 4898.4124] +25-08-31 10:34:08 | D | - best error = [ 5108.4698, 5046.3391, 5005.4053, 4977.6388, 4955.3862, 4938.6919, 4927.5404, 4916.0814, 4907.0869, 4898.4124] +25-08-31 10:34:21 | D | - iter = [ 10, 11, 12, 13, 14, 15] +25-08-31 10:34:21 | D | - error = [ 4891.1541, 4884.8152, 4879.5451, 4875.5065, 4870.8964, 4894.3447] +25-08-31 10:34:21 | D | - best error = [ 4891.1541, 4884.8152, 4879.5451, 4875.5065, 4870.8964, 4870.8964] +25-08-31 10:34:21 | D | + Adding low-rank branches to transformer_blocks.16.ff.net.0.proj +25-08-31 10:34:21 | D | - Calibrating low-rank branch for transformer_blocks.16.ff.net.2 +25-08-31 10:34:21 | D | + w: sfp4_e2m1_all +25-08-31 10:34:21 | D | + x: sfp4_e2m1_all +25-08-31 10:34:21 | D | + y: None +25-08-31 10:34:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:34:21 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:34:21 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:34:25 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:34:55 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:34:55 | D | - error = [ 5047.6541, 5020.0460, 4997.8543, 4981.8816, 4968.1262, 4959.6681, 4950.5775, 4941.7596, 4935.9848, 4931.6890] +25-08-31 10:34:55 | D | - best error = [ 5047.6541, 5020.0460, 4997.8543, 4981.8816, 4968.1262, 4959.6681, 4950.5775, 4941.7596, 4935.9848, 4931.6890] +25-08-31 10:35:25 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:35:25 | D | - error = [ 4927.7476, 4922.3511, 4919.1890, 4917.3639, 4915.2231, 4909.8955, 4908.5635, 4906.1690, 4904.2967, 4900.3190] +25-08-31 10:35:25 | D | - best error = [ 4927.7476, 4922.3511, 4919.1890, 4917.3639, 4915.2231, 4909.8955, 4908.5635, 4906.1690, 4904.2967, 4900.3190] +25-08-31 10:35:55 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 10:35:55 | D | - error = [ 4897.7644, 4895.3067, 4893.5272, 4891.7997, 4891.4795, 4889.9423, 4889.6593, 4887.7635, 4887.5558, 4884.6179] +25-08-31 10:35:55 | D | - best error = [ 4897.7644, 4895.3067, 4893.5272, 4891.7997, 4891.4795, 4889.9423, 4889.6593, 4887.7635, 4887.5558, 4884.6179] +25-08-31 10:36:13 | D | - iter = [ 30, 31, 32, 33, 34, 35] +25-08-31 10:36:13 | D | - error = [ 4883.3091, 4881.9031, 4881.3691, 4880.1348, 4878.1043, 4879.3454] +25-08-31 10:36:13 | D | - best error = [ 4883.3091, 4881.9031, 4881.3691, 4880.1348, 4878.1043, 4878.1043] +25-08-31 10:36:13 | D | + Adding low-rank branches to transformer_blocks.16.ff.net.2 +25-08-31 10:36:14 | D | - Calibrating low-rank branch for transformer_blocks.16.ff_context.net.0.proj +25-08-31 10:36:14 | D | + w: sfp4_e2m1_all +25-08-31 10:36:14 | D | + x: sfp4_e2m1_all +25-08-31 10:36:14 | D | + y: None +25-08-31 10:36:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:36:14 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:36:14 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:36:14 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:36:25 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:36:25 | D | - error = [ 1317.9950, 1285.5848, 1264.1642, 1247.1627, 1236.2119, 1227.3225, 1218.3818, 1214.1860, 1206.8356, 1203.2147] +25-08-31 10:36:25 | D | - best error = [ 1317.9950, 1285.5848, 1264.1642, 1247.1627, 1236.2119, 1227.3225, 1218.3818, 1214.1860, 1206.8356, 1203.2147] +25-08-31 10:36:37 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:36:37 | D | - error = [ 1198.5668, 1196.8595, 1193.9481, 1191.0288, 1189.1261, 1186.4245, 1185.8366, 1183.6400, 1182.6590, 1181.2911] +25-08-31 10:36:37 | D | - best error = [ 1198.5668, 1196.8595, 1193.9481, 1191.0288, 1189.1261, 1186.4245, 1185.8366, 1183.6400, 1182.6590, 1181.2911] +25-08-31 10:36:47 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28] +25-08-31 10:36:47 | D | - error = [ 1180.6944, 1180.1579, 1178.0635, 1177.6686, 1177.1180, 1176.8100, 1174.9803, 1173.8458, 1174.7129] +25-08-31 10:36:47 | D | - best error = [ 1180.6944, 1180.1579, 1178.0635, 1177.6686, 1177.1180, 1176.8100, 1174.9803, 1173.8458, 1173.8458] +25-08-31 10:36:47 | D | + Adding low-rank branches to transformer_blocks.16.ff_context.net.0.proj +25-08-31 10:36:47 | D | - Calibrating low-rank branch for transformer_blocks.16.ff_context.net.2 +25-08-31 10:36:47 | D | + w: sfp4_e2m1_all +25-08-31 10:36:47 | D | + x: sfp4_e2m1_all +25-08-31 10:36:47 | D | + y: None +25-08-31 10:36:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:36:47 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:36:47 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:36:48 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:37:01 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:37:01 | D | - error = [ 322.5093, 319.4500, 317.8890, 316.8427, 316.1733, 315.6094, 314.8468, 314.3506, 314.2303, 313.7630] +25-08-31 10:37:01 | D | - best error = [ 322.5093, 319.4500, 317.8890, 316.8427, 316.1733, 315.6094, 314.8468, 314.3506, 314.2303, 313.7630] +25-08-31 10:37:13 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:37:13 | D | - error = [ 313.2646, 313.0118, 312.8520, 312.8424, 312.6553, 312.6071, 312.2722, 312.1021, 311.8226, 311.6997] +25-08-31 10:37:13 | D | - best error = [ 313.2646, 313.0118, 312.8520, 312.8424, 312.6553, 312.6071, 312.2722, 312.1021, 311.8226, 311.6997] +25-08-31 10:37:19 | D | - iter = [ 20, 21, 22, 23, 24] +25-08-31 10:37:19 | D | - error = [ 311.5871, 311.2322, 310.9289, 310.7016, 310.7654] +25-08-31 10:37:19 | D | - best error = [ 311.5871, 311.2322, 310.9289, 310.7016, 310.7016] +25-08-31 10:37:20 | D | + Adding low-rank branches to transformer_blocks.16.ff_context.net.2 +25-08-31 10:37:38 | D | - Calibrating low-rank branches of block transformer_blocks.17 +25-08-31 10:37:38 | D | - Calibrating low-rank branch for transformer_blocks.17.attn.to_q, transformer_blocks.17.attn.to_k, transformer_blocks.17.attn.to_v +25-08-31 10:37:38 | D | + w: sfp4_e2m1_all +25-08-31 10:37:38 | D | + x: sfp4_e2m1_all +25-08-31 10:37:38 | D | + y: None +25-08-31 10:37:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:37:38 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:37:38 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:37:39 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:37:56 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 10:37:56 | D | - error = [ 3391.2891, 3299.5992, 3236.7471, 3232.1397, 3200.2978, 3175.3732, 3171.0637, 3145.1127, 3146.4494] +25-08-31 10:37:56 | D | - best error = [ 3391.2891, 3299.5992, 3236.7471, 3232.1397, 3200.2978, 3175.3732, 3171.0637, 3145.1127, 3145.1127] +25-08-31 10:37:56 | D | + Adding low-rank branches to transformer_blocks.17.attn.to_q, transformer_blocks.17.attn.to_k, transformer_blocks.17.attn.to_v +25-08-31 10:37:56 | D | - Calibrating low-rank branch for transformer_blocks.17.attn.add_q_proj, transformer_blocks.17.attn.add_k_proj, transformer_blocks.17.attn.add_v_proj +25-08-31 10:37:56 | D | + w: sfp4_e2m1_all +25-08-31 10:37:56 | D | + x: sfp4_e2m1_all +25-08-31 10:37:56 | D | + y: None +25-08-31 10:37:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:37:56 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:37:56 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:37:57 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:38:02 | D | - iter = [ 0, 1, 2] +25-08-31 10:38:02 | D | - error = [ 1156.8510, 1129.8423, 1129.9941] +25-08-31 10:38:02 | D | - best error = [ 1156.8510, 1129.8423, 1129.8423] +25-08-31 10:38:02 | D | + Adding low-rank branches to transformer_blocks.17.attn.add_q_proj, transformer_blocks.17.attn.add_k_proj, transformer_blocks.17.attn.add_v_proj +25-08-31 10:38:02 | D | - Calibrating low-rank branch for transformer_blocks.17.attn.to_out.0 +25-08-31 10:38:02 | D | + w: sfp4_e2m1_all +25-08-31 10:38:02 | D | + x: sfp4_e2m1_all +25-08-31 10:38:02 | D | + y: None +25-08-31 10:38:02 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:38:02 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:38:02 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:38:04 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:38:18 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:38:18 | D | - error = [15825.3933, 15698.4753, 15626.3342, 15545.4158, 15481.5601, 15435.0593, 15385.7404, 15361.7009, 15338.9439, 15314.9799] +25-08-31 10:38:18 | D | - best error = [15825.3933, 15698.4753, 15626.3342, 15545.4158, 15481.5601, 15435.0593, 15385.7404, 15361.7009, 15338.9439, 15314.9799] +25-08-31 10:38:33 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:38:33 | D | - error = [15294.8980, 15279.2105, 15268.8525, 15254.2002, 15239.5895, 15233.3755, 15221.8249, 15215.5899, 15195.8247, 15190.1737] +25-08-31 10:38:33 | D | - best error = [15294.8980, 15279.2105, 15268.8525, 15254.2002, 15239.5895, 15233.3755, 15221.8249, 15215.5899, 15195.8247, 15190.1737] +25-08-31 10:38:44 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27] +25-08-31 10:38:44 | D | - error = [15177.0068, 15174.2266, 15166.4741, 15160.4992, 15155.4500, 15149.4787, 15145.2218, 15145.8610] +25-08-31 10:38:44 | D | - best error = [15177.0068, 15174.2266, 15166.4741, 15160.4992, 15155.4500, 15149.4787, 15145.2218, 15145.2218] +25-08-31 10:38:45 | D | + Adding low-rank branches to transformer_blocks.17.attn.to_out.0 +25-08-31 10:38:45 | D | - Calibrating low-rank branch for transformer_blocks.17.attn.to_add_out +25-08-31 10:38:45 | D | + w: sfp4_e2m1_all +25-08-31 10:38:45 | D | + x: sfp4_e2m1_all +25-08-31 10:38:45 | D | + y: None +25-08-31 10:38:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:38:45 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:38:45 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:38:45 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:38:55 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:38:55 | D | - error = [ 2774.0451, 2741.2271, 2722.2249, 2701.9070, 2693.8253, 2684.5474, 2682.9022, 2673.9570, 2668.8149, 2665.0240] +25-08-31 10:38:55 | D | - best error = [ 2774.0451, 2741.2271, 2722.2249, 2701.9070, 2693.8253, 2684.5474, 2682.9022, 2673.9570, 2668.8149, 2665.0240] +25-08-31 10:39:00 | D | - iter = [ 10, 11, 12, 13] +25-08-31 10:39:00 | D | - error = [ 2658.6677, 2654.2554, 2648.1038, 2652.6434] +25-08-31 10:39:00 | D | - best error = [ 2658.6677, 2654.2554, 2648.1038, 2648.1038] +25-08-31 10:39:00 | D | + Adding low-rank branches to transformer_blocks.17.attn.to_add_out +25-08-31 10:39:00 | D | - Calibrating low-rank branch for transformer_blocks.17.ff.net.0.proj +25-08-31 10:39:00 | D | + w: sfp4_e2m1_all +25-08-31 10:39:00 | D | + x: sfp4_e2m1_all +25-08-31 10:39:00 | D | + y: None +25-08-31 10:39:00 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:39:00 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:39:00 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:39:02 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:39:12 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-31 10:39:12 | D | - error = [ 5331.2314, 5265.0028, 5221.5682, 5190.4324, 5191.0014] +25-08-31 10:39:12 | D | - best error = [ 5331.2314, 5265.0028, 5221.5682, 5190.4324, 5190.4324] +25-08-31 10:39:12 | D | + Adding low-rank branches to transformer_blocks.17.ff.net.0.proj +25-08-31 10:39:12 | D | - Calibrating low-rank branch for transformer_blocks.17.ff.net.2 +25-08-31 10:39:12 | D | + w: sfp4_e2m1_all +25-08-31 10:39:12 | D | + x: sfp4_e2m1_all +25-08-31 10:39:12 | D | + y: None +25-08-31 10:39:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:39:12 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:39:12 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:39:17 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:39:47 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:39:47 | D | - error = [ 5035.7421, 5006.7494, 4987.1902, 4965.5519, 4964.9896, 4949.5733, 4944.4967, 4931.3923, 4921.4701, 4915.5167] +25-08-31 10:39:47 | D | - best error = [ 5035.7421, 5006.7494, 4987.1902, 4965.5519, 4964.9896, 4949.5733, 4944.4967, 4931.3923, 4921.4701, 4915.5167] +25-08-31 10:39:55 | D | - iter = [ 10, 11, 12] +25-08-31 10:39:55 | D | - error = [ 4910.5786, 4904.8019, 4908.7245] +25-08-31 10:39:55 | D | - best error = [ 4910.5786, 4904.8019, 4904.8019] +25-08-31 10:39:56 | D | + Adding low-rank branches to transformer_blocks.17.ff.net.2 +25-08-31 10:39:56 | D | - Calibrating low-rank branch for transformer_blocks.17.ff_context.net.0.proj +25-08-31 10:39:56 | D | + w: sfp4_e2m1_all +25-08-31 10:39:56 | D | + x: sfp4_e2m1_all +25-08-31 10:39:56 | D | + y: None +25-08-31 10:39:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:39:56 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:39:56 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:39:56 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:40:02 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-31 10:40:02 | D | - error = [ 1316.8287, 1287.2488, 1266.6398, 1249.6750, 1254.6064] +25-08-31 10:40:02 | D | - best error = [ 1316.8287, 1287.2488, 1266.6398, 1249.6750, 1249.6750] +25-08-31 10:40:02 | D | + Adding low-rank branches to transformer_blocks.17.ff_context.net.0.proj +25-08-31 10:40:02 | D | - Calibrating low-rank branch for transformer_blocks.17.ff_context.net.2 +25-08-31 10:40:02 | D | + w: sfp4_e2m1_all +25-08-31 10:40:02 | D | + x: sfp4_e2m1_all +25-08-31 10:40:02 | D | + y: None +25-08-31 10:40:02 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:40:02 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:40:02 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:40:03 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:40:15 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:40:15 | D | - error = [ 500.0851, 494.1256, 489.7347, 486.8364, 484.2620, 482.3200, 481.1544, 479.5301, 478.4758, 477.7910] +25-08-31 10:40:15 | D | - best error = [ 500.0851, 494.1256, 489.7347, 486.8364, 484.2620, 482.3200, 481.1544, 479.5301, 478.4758, 477.7910] +25-08-31 10:40:18 | D | - iter = [ 10, 11] +25-08-31 10:40:18 | D | - error = [ 477.3035, 477.5278] +25-08-31 10:40:18 | D | - best error = [ 477.3035, 477.3035] +25-08-31 10:40:18 | D | + Adding low-rank branches to transformer_blocks.17.ff_context.net.2 +25-08-31 10:40:36 | D | - Calibrating low-rank branches of block transformer_blocks.18 +25-08-31 10:40:36 | D | - Calibrating low-rank branch for transformer_blocks.18.attn.to_q, transformer_blocks.18.attn.to_k, transformer_blocks.18.attn.to_v +25-08-31 10:40:36 | D | + w: sfp4_e2m1_all +25-08-31 10:40:36 | D | + x: sfp4_e2m1_all +25-08-31 10:40:36 | D | + y: None +25-08-31 10:40:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:40:36 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:40:36 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:40:37 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:40:45 | D | - iter = [ 0, 1, 2, 3] +25-08-31 10:40:45 | D | - error = [ 3780.3167, 3733.9020, 3681.7520, 3682.9086] +25-08-31 10:40:45 | D | - best error = [ 3780.3167, 3733.9020, 3681.7520, 3681.7520] +25-08-31 10:40:45 | D | + Adding low-rank branches to transformer_blocks.18.attn.to_q, transformer_blocks.18.attn.to_k, transformer_blocks.18.attn.to_v +25-08-31 10:40:45 | D | - Calibrating low-rank branch for transformer_blocks.18.attn.add_q_proj, transformer_blocks.18.attn.add_k_proj, transformer_blocks.18.attn.add_v_proj +25-08-31 10:40:45 | D | + w: sfp4_e2m1_all +25-08-31 10:40:45 | D | + x: sfp4_e2m1_all +25-08-31 10:40:45 | D | + y: None +25-08-31 10:40:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:40:45 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:40:45 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:40:46 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:40:58 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 10:40:58 | D | - error = [ 1603.5394, 1569.7796, 1537.8580, 1517.4122, 1495.2218, 1491.4899, 1521.8806] +25-08-31 10:40:58 | D | - best error = [ 1603.5394, 1569.7796, 1537.8580, 1517.4122, 1495.2218, 1491.4899, 1491.4899] +25-08-31 10:40:58 | D | + Adding low-rank branches to transformer_blocks.18.attn.add_q_proj, transformer_blocks.18.attn.add_k_proj, transformer_blocks.18.attn.add_v_proj +25-08-31 10:40:58 | D | - Calibrating low-rank branch for transformer_blocks.18.attn.to_out.0 +25-08-31 10:40:58 | D | + w: sfp4_e2m1_all +25-08-31 10:40:58 | D | + x: sfp4_e2m1_all +25-08-31 10:40:58 | D | + y: None +25-08-31 10:40:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:40:58 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:40:58 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:41:00 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:41:11 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 10:41:11 | D | - error = [ 4817.3408, 4780.5531, 4743.4473, 4726.3601, 4713.5585, 4696.4276, 4691.2512, 4701.1794] +25-08-31 10:41:11 | D | - best error = [ 4817.3408, 4780.5531, 4743.4473, 4726.3601, 4713.5585, 4696.4276, 4691.2512, 4691.2512] +25-08-31 10:41:11 | D | + Adding low-rank branches to transformer_blocks.18.attn.to_out.0 +25-08-31 10:41:11 | D | - Calibrating low-rank branch for transformer_blocks.18.attn.to_add_out +25-08-31 10:41:11 | D | + w: sfp4_e2m1_all +25-08-31 10:41:11 | D | + x: sfp4_e2m1_all +25-08-31 10:41:11 | D | + y: None +25-08-31 10:41:11 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:41:11 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:41:11 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:41:12 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:41:22 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:41:22 | D | - error = [ 1076.2967, 1062.0839, 1053.2426, 1046.9790, 1041.3975, 1036.5056, 1032.0097, 1028.3004, 1027.0088, 1024.8447] +25-08-31 10:41:22 | D | - best error = [ 1076.2967, 1062.0839, 1053.2426, 1046.9790, 1041.3975, 1036.5056, 1032.0097, 1028.3004, 1027.0088, 1024.8447] +25-08-31 10:41:24 | D | - iter = [ 10, 11] +25-08-31 10:41:24 | D | - error = [ 1024.2603, 1027.2223] +25-08-31 10:41:24 | D | - best error = [ 1024.2603, 1024.2603] +25-08-31 10:41:24 | D | + Adding low-rank branches to transformer_blocks.18.attn.to_add_out +25-08-31 10:41:24 | D | - Calibrating low-rank branch for transformer_blocks.18.ff.net.0.proj +25-08-31 10:41:24 | D | + w: sfp4_e2m1_all +25-08-31 10:41:24 | D | + x: sfp4_e2m1_all +25-08-31 10:41:24 | D | + y: None +25-08-31 10:41:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:41:24 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:41:24 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:41:26 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:41:46 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:41:46 | D | - error = [ 5101.7079, 5044.2548, 5005.3927, 5002.2571, 4964.0929, 4941.3166, 4924.8436, 4911.4837, 4902.0453, 4894.7999] +25-08-31 10:41:46 | D | - best error = [ 5101.7079, 5044.2548, 5005.3927, 5002.2571, 4964.0929, 4941.3166, 4924.8436, 4911.4837, 4902.0453, 4894.7999] +25-08-31 10:42:07 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:42:07 | D | - error = [ 4888.1289, 4882.5669, 4876.7506, 4872.3364, 4868.9539, 4865.7287, 4862.3990, 4858.9104, 4856.6882, 4852.6609] +25-08-31 10:42:07 | D | - best error = [ 4888.1289, 4882.5669, 4876.7506, 4872.3364, 4868.9539, 4865.7287, 4862.3990, 4858.9104, 4856.6882, 4852.6609] +25-08-31 10:42:14 | D | - iter = [ 20, 21, 22, 23] +25-08-31 10:42:14 | D | - error = [ 4849.8687, 4847.4356, 4845.1469, 4856.5951] +25-08-31 10:42:14 | D | - best error = [ 4849.8687, 4847.4356, 4845.1469, 4845.1469] +25-08-31 10:42:14 | D | + Adding low-rank branches to transformer_blocks.18.ff.net.0.proj +25-08-31 10:42:15 | D | - Calibrating low-rank branch for transformer_blocks.18.ff.net.2 +25-08-31 10:42:15 | D | + w: sfp4_e2m1_all +25-08-31 10:42:15 | D | + x: sfp4_e2m1_all +25-08-31 10:42:15 | D | + y: None +25-08-31 10:42:15 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:42:15 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:42:15 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:42:19 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:42:37 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 10:42:37 | D | - error = [ 3907.3042, 3883.4092, 3870.6492, 3863.0799, 3850.7698, 3854.5980] +25-08-31 10:42:37 | D | - best error = [ 3907.3042, 3883.4092, 3870.6492, 3863.0799, 3850.7698, 3850.7698] +25-08-31 10:42:37 | D | + Adding low-rank branches to transformer_blocks.18.ff.net.2 +25-08-31 10:42:37 | D | - Calibrating low-rank branch for transformer_blocks.18.ff_context.net.0.proj +25-08-31 10:42:37 | D | + w: sfp4_e2m1_all +25-08-31 10:42:37 | D | + x: sfp4_e2m1_all +25-08-31 10:42:37 | D | + y: None +25-08-31 10:42:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:42:37 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:42:37 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:42:38 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:42:42 | D | - iter = [ 0, 1, 2, 3] +25-08-31 10:42:42 | D | - error = [ 1774.6852, 1754.9131, 1741.4632, 1743.3315] +25-08-31 10:42:42 | D | - best error = [ 1774.6852, 1754.9131, 1741.4632, 1741.4632] +25-08-31 10:42:43 | D | + Adding low-rank branches to transformer_blocks.18.ff_context.net.0.proj +25-08-31 10:42:43 | D | - Calibrating low-rank branch for transformer_blocks.18.ff_context.net.2 +25-08-31 10:42:43 | D | + w: sfp4_e2m1_all +25-08-31 10:42:43 | D | + x: sfp4_e2m1_all +25-08-31 10:42:43 | D | + y: None +25-08-31 10:42:43 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:42:43 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:42:43 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:42:44 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:42:56 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:42:56 | D | - error = [ 627.2824, 619.5901, 614.9125, 611.5267, 610.5851, 609.3120, 607.9458, 607.2460, 605.5819, 604.4360] +25-08-31 10:42:56 | D | - best error = [ 627.2824, 619.5901, 614.9125, 611.5267, 610.5851, 609.3120, 607.9458, 607.2460, 605.5819, 604.4360] +25-08-31 10:43:06 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17] +25-08-31 10:43:06 | D | - error = [ 603.5059, 603.4171, 603.0808, 602.2185, 602.0616, 601.4150, 600.8097, 601.2600] +25-08-31 10:43:06 | D | - best error = [ 603.5059, 603.4171, 603.0808, 602.2185, 602.0616, 601.4150, 600.8097, 600.8097] +25-08-31 10:43:06 | D | + Adding low-rank branches to transformer_blocks.18.ff_context.net.2 +25-08-31 10:43:24 | D | - Calibrating low-rank branches of block single_transformer_blocks.0 +25-08-31 10:43:24 | D | - Calibrating low-rank branch for single_transformer_blocks.0.attn.to_q, single_transformer_blocks.0.attn.to_k, single_transformer_blocks.0.attn.to_v +25-08-31 10:43:24 | D | + w: sfp4_e2m1_all +25-08-31 10:43:24 | D | + x: sfp4_e2m1_all +25-08-31 10:43:24 | D | + y: None +25-08-31 10:43:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:43:24 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:43:24 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:43:25 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:43:36 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-31 10:43:36 | D | - error = [ 7535.2725, 7407.3202, 7326.9153, 7191.7208, 7267.4331] +25-08-31 10:43:36 | D | - best error = [ 7535.2725, 7407.3202, 7326.9153, 7191.7208, 7191.7208] +25-08-31 10:43:36 | D | + Adding low-rank branches to single_transformer_blocks.0.attn.to_q, single_transformer_blocks.0.attn.to_k, single_transformer_blocks.0.attn.to_v +25-08-31 10:43:36 | D | - Calibrating low-rank branch for single_transformer_blocks.0.proj_out.linears.0 +25-08-31 10:43:36 | D | + w: sfp4_e2m1_all +25-08-31 10:43:36 | D | + x: sfp4_e2m1_all +25-08-31 10:43:36 | D | + y: None +25-08-31 10:43:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:43:36 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:43:36 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:43:37 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:43:48 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 10:43:48 | D | - error = [ 6064.6365, 6025.8828, 5977.4413, 5944.7113, 5920.5223, 5902.7783, 5906.1419] +25-08-31 10:43:48 | D | - best error = [ 6064.6365, 6025.8828, 5977.4413, 5944.7113, 5920.5223, 5902.7783, 5902.7783] +25-08-31 10:43:48 | D | + Adding low-rank branches to single_transformer_blocks.0.proj_out.linears.0 +25-08-31 10:43:48 | D | - Calibrating low-rank branch for single_transformer_blocks.0.proj_mlp +25-08-31 10:43:48 | D | + w: sfp4_e2m1_all +25-08-31 10:43:48 | D | + x: sfp4_e2m1_all +25-08-31 10:43:48 | D | + y: None +25-08-31 10:43:48 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:43:48 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:43:48 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:43:49 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:44:10 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:44:10 | D | - error = [ 5238.2659, 5193.4815, 5164.4279, 5142.3117, 5136.5467, 5116.5840, 5104.4808, 5093.0455, 5086.4841, 5079.2775] +25-08-31 10:44:10 | D | - best error = [ 5238.2659, 5193.4815, 5164.4279, 5142.3117, 5136.5467, 5116.5840, 5104.4808, 5093.0455, 5086.4841, 5079.2775] +25-08-31 10:44:18 | D | - iter = [ 10, 11, 12, 13] +25-08-31 10:44:18 | D | - error = [ 5073.4974, 5069.8493, 5066.0766, 5070.4737] +25-08-31 10:44:18 | D | - best error = [ 5073.4974, 5069.8493, 5066.0766, 5066.0766] +25-08-31 10:44:18 | D | + Adding low-rank branches to single_transformer_blocks.0.proj_mlp +25-08-31 10:44:19 | D | - Calibrating low-rank branch for single_transformer_blocks.0.proj_out.linears.1 +25-08-31 10:44:19 | D | + w: sfp4_e2m1_all +25-08-31 10:44:19 | D | + x: sfp4_e2m1_all +25-08-31 10:44:19 | D | + y: None +25-08-31 10:44:19 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:44:19 | D | + finished parsing calibration arguments, ram usage: 14.8 +25-08-31 10:44:19 | D | + finished resetting calibrator, ram usage: 14.8 +25-08-31 10:44:23 | D | + finished calculating the original outputs, ram usage: 14.8 +25-08-31 10:44:54 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:44:54 | D | - error = [ 5459.3707, 5425.0253, 5401.3528, 5383.7621, 5371.3283, 5358.7941, 5350.8848, 5339.4865, 5331.0976, 5325.3828] +25-08-31 10:44:54 | D | - best error = [ 5459.3707, 5425.0253, 5401.3528, 5383.7621, 5371.3283, 5358.7941, 5350.8848, 5339.4865, 5331.0976, 5325.3828] +25-08-31 10:45:26 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:45:26 | D | - error = [ 5320.5581, 5316.5837, 5311.0860, 5307.5271, 5304.0671, 5300.4798, 5298.5826, 5294.1925, 5292.2549, 5289.9991] +25-08-31 10:45:26 | D | - best error = [ 5320.5581, 5316.5837, 5311.0860, 5307.5271, 5304.0671, 5300.4798, 5298.5826, 5294.1925, 5292.2549, 5289.9991] +25-08-31 10:45:42 | D | - iter = [ 20, 21, 22, 23, 24] +25-08-31 10:45:42 | D | - error = [ 5287.9862, 5284.3111, 5283.8399, 5280.5191, 5280.5443] +25-08-31 10:45:42 | D | - best error = [ 5287.9862, 5284.3111, 5283.8399, 5280.5191, 5280.5191] +25-08-31 10:45:42 | D | + Adding low-rank branches to single_transformer_blocks.0.proj_out.linears.1 +25-08-31 10:46:00 | D | - Calibrating low-rank branches of block single_transformer_blocks.1 +25-08-31 10:46:00 | D | - Calibrating low-rank branch for single_transformer_blocks.1.attn.to_q, single_transformer_blocks.1.attn.to_k, single_transformer_blocks.1.attn.to_v +25-08-31 10:46:00 | D | + w: sfp4_e2m1_all +25-08-31 10:46:00 | D | + x: sfp4_e2m1_all +25-08-31 10:46:00 | D | + y: None +25-08-31 10:46:00 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:46:00 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:46:00 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:46:01 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:46:20 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 10:46:20 | D | - error = [ 7471.1158, 7306.9498, 7254.3355, 7163.4053, 7117.3588, 7059.9652, 7051.7725, 7019.8826, 7080.9513] +25-08-31 10:46:20 | D | - best error = [ 7471.1158, 7306.9498, 7254.3355, 7163.4053, 7117.3588, 7059.9652, 7051.7725, 7019.8826, 7019.8826] +25-08-31 10:46:21 | D | + Adding low-rank branches to single_transformer_blocks.1.attn.to_q, single_transformer_blocks.1.attn.to_k, single_transformer_blocks.1.attn.to_v +25-08-31 10:46:21 | D | - Calibrating low-rank branch for single_transformer_blocks.1.proj_out.linears.0 +25-08-31 10:46:21 | D | + w: sfp4_e2m1_all +25-08-31 10:46:21 | D | + x: sfp4_e2m1_all +25-08-31 10:46:21 | D | + y: None +25-08-31 10:46:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:46:21 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:46:21 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:46:22 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:46:37 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:46:37 | D | - error = [ 4781.9270, 4740.8170, 4710.9103, 4689.0856, 4683.8721, 4665.6798, 4654.5694, 4635.5856, 4626.6515, 4619.9777] +25-08-31 10:46:37 | D | - best error = [ 4781.9270, 4740.8170, 4710.9103, 4689.0856, 4683.8721, 4665.6798, 4654.5694, 4635.5856, 4626.6515, 4619.9777] +25-08-31 10:46:40 | D | - iter = [ 10, 11] +25-08-31 10:46:40 | D | - error = [ 4614.4321, 4626.7379] +25-08-31 10:46:40 | D | - best error = [ 4614.4321, 4614.4321] +25-08-31 10:46:40 | D | + Adding low-rank branches to single_transformer_blocks.1.proj_out.linears.0 +25-08-31 10:46:40 | D | - Calibrating low-rank branch for single_transformer_blocks.1.proj_mlp +25-08-31 10:46:40 | D | + w: sfp4_e2m1_all +25-08-31 10:46:40 | D | + x: sfp4_e2m1_all +25-08-31 10:46:40 | D | + y: None +25-08-31 10:46:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:46:40 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:46:40 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:46:42 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:47:03 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:47:03 | D | - error = [ 5728.1013, 5674.2281, 5652.3860, 5621.7447, 5600.1277, 5597.4382, 5578.0541, 5567.0297, 5555.9550, 5548.8604] +25-08-31 10:47:03 | D | - best error = [ 5728.1013, 5674.2281, 5652.3860, 5621.7447, 5600.1277, 5597.4382, 5578.0541, 5567.0297, 5555.9550, 5548.8604] +25-08-31 10:47:24 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:47:24 | D | - error = [ 5542.0812, 5536.9233, 5530.7744, 5526.4792, 5522.7147, 5518.2109, 5514.8938, 5511.8200, 5509.0569, 5505.5634] +25-08-31 10:47:24 | D | - best error = [ 5542.0812, 5536.9233, 5530.7744, 5526.4792, 5522.7147, 5518.2109, 5514.8938, 5511.8200, 5509.0569, 5505.5634] +25-08-31 10:47:46 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 10:47:46 | D | - error = [ 5501.8697, 5499.9400, 5498.3883, 5497.3541, 5496.1184, 5494.6379, 5493.5992, 5490.8797, 5490.0665, 5489.1340] +25-08-31 10:47:46 | D | - best error = [ 5501.8697, 5499.9400, 5498.3883, 5497.3541, 5496.1184, 5494.6379, 5493.5992, 5490.8797, 5490.0665, 5489.1340] +25-08-31 10:47:53 | D | - iter = [ 30, 31, 32] +25-08-31 10:47:53 | D | - error = [ 5488.8842, 5487.1873, 5487.5683] +25-08-31 10:47:53 | D | - best error = [ 5488.8842, 5487.1873, 5487.1873] +25-08-31 10:47:53 | D | + Adding low-rank branches to single_transformer_blocks.1.proj_mlp +25-08-31 10:47:53 | D | - Calibrating low-rank branch for single_transformer_blocks.1.proj_out.linears.1 +25-08-31 10:47:53 | D | + w: sfp4_e2m1_all +25-08-31 10:47:53 | D | + x: sfp4_e2m1_all +25-08-31 10:47:53 | D | + y: None +25-08-31 10:47:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:47:53 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:47:53 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:47:58 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:48:30 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:48:30 | D | - error = [ 6556.3133, 6529.0051, 6493.5636, 6469.2630, 6454.4614, 6440.8965, 6432.3803, 6422.3021, 6412.2820, 6404.3610] +25-08-31 10:48:30 | D | - best error = [ 6556.3133, 6529.0051, 6493.5636, 6469.2630, 6454.4614, 6440.8965, 6432.3803, 6422.3021, 6412.2820, 6404.3610] +25-08-31 10:49:02 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:49:02 | D | - error = [ 6398.0663, 6392.8957, 6386.9761, 6381.3202, 6378.7184, 6375.7365, 6373.2278, 6371.0480, 6369.6396, 6367.3388] +25-08-31 10:49:02 | D | - best error = [ 6398.0663, 6392.8957, 6386.9761, 6381.3202, 6378.7184, 6375.7365, 6373.2278, 6371.0480, 6369.6396, 6367.3388] +25-08-31 10:49:12 | D | - iter = [ 20, 21, 22] +25-08-31 10:49:12 | D | - error = [ 6363.7819, 6361.0273, 6372.6541] +25-08-31 10:49:12 | D | - best error = [ 6363.7819, 6361.0273, 6361.0273] +25-08-31 10:49:12 | D | + Adding low-rank branches to single_transformer_blocks.1.proj_out.linears.1 +25-08-31 10:49:30 | D | - Calibrating low-rank branches of block single_transformer_blocks.2 +25-08-31 10:49:30 | D | - Calibrating low-rank branch for single_transformer_blocks.2.attn.to_q, single_transformer_blocks.2.attn.to_k, single_transformer_blocks.2.attn.to_v +25-08-31 10:49:30 | D | + w: sfp4_e2m1_all +25-08-31 10:49:30 | D | + x: sfp4_e2m1_all +25-08-31 10:49:30 | D | + y: None +25-08-31 10:49:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:49:30 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:49:30 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:49:31 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:49:46 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 10:49:46 | D | - error = [ 8998.0794, 8882.0394, 8748.0361, 8744.6807, 8638.9143, 8638.7022, 8704.3079] +25-08-31 10:49:46 | D | - best error = [ 8998.0794, 8882.0394, 8748.0361, 8744.6807, 8638.9143, 8638.7022, 8638.7022] +25-08-31 10:49:46 | D | + Adding low-rank branches to single_transformer_blocks.2.attn.to_q, single_transformer_blocks.2.attn.to_k, single_transformer_blocks.2.attn.to_v +25-08-31 10:49:47 | D | - Calibrating low-rank branch for single_transformer_blocks.2.proj_out.linears.0 +25-08-31 10:49:47 | D | + w: sfp4_e2m1_all +25-08-31 10:49:47 | D | + x: sfp4_e2m1_all +25-08-31 10:49:47 | D | + y: None +25-08-31 10:49:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:49:47 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:49:47 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:49:47 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:50:03 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:50:03 | D | - error = [ 5496.0489, 5442.8983, 5404.7834, 5379.8954, 5363.2531, 5347.3184, 5335.9665, 5323.6814, 5320.2756, 5311.2748] +25-08-31 10:50:03 | D | - best error = [ 5496.0489, 5442.8983, 5404.7834, 5379.8954, 5363.2531, 5347.3184, 5335.9665, 5323.6814, 5320.2756, 5311.2748] +25-08-31 10:50:17 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:50:17 | D | - error = [ 5298.9444, 5288.7590, 5286.0178, 5275.9488, 5270.7918, 5268.1500, 5262.9302, 5261.4287, 5257.5422, 5254.9280] +25-08-31 10:50:17 | D | - best error = [ 5298.9444, 5288.7590, 5286.0178, 5275.9488, 5270.7918, 5268.1500, 5262.9302, 5261.4287, 5257.5422, 5254.9280] +25-08-31 10:50:20 | D | - iter = [ 20, 21] +25-08-31 10:50:20 | D | - error = [ 5253.3285, 5266.5336] +25-08-31 10:50:20 | D | - best error = [ 5253.3285, 5253.3285] +25-08-31 10:50:21 | D | + Adding low-rank branches to single_transformer_blocks.2.proj_out.linears.0 +25-08-31 10:50:21 | D | - Calibrating low-rank branch for single_transformer_blocks.2.proj_mlp +25-08-31 10:50:21 | D | + w: sfp4_e2m1_all +25-08-31 10:50:21 | D | + x: sfp4_e2m1_all +25-08-31 10:50:21 | D | + y: None +25-08-31 10:50:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:50:21 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:50:21 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:50:23 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:50:44 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:50:44 | D | - error = [ 4776.4166, 4739.8987, 4718.8922, 4701.6057, 4687.5081, 4676.6663, 4668.5797, 4661.8029, 4656.5225, 4650.9715] +25-08-31 10:50:44 | D | - best error = [ 4776.4166, 4739.8987, 4718.8922, 4701.6057, 4687.5081, 4676.6663, 4668.5797, 4661.8029, 4656.5225, 4650.9715] +25-08-31 10:51:06 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:51:06 | D | - error = [ 4646.5017, 4642.0306, 4638.8009, 4635.4937, 4633.8146, 4630.9485, 4627.5711, 4626.8990, 4624.2855, 4622.5062] +25-08-31 10:51:06 | D | - best error = [ 4646.5017, 4642.0306, 4638.8009, 4635.4937, 4633.8146, 4630.9485, 4627.5711, 4626.8990, 4624.2855, 4622.5062] +25-08-31 10:51:27 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 10:51:27 | D | - error = [ 4620.8229, 4619.7838, 4618.7875, 4618.5086, 4616.8102, 4616.3013, 4615.6196, 4613.4662, 4612.5099, 4611.6147] +25-08-31 10:51:27 | D | - best error = [ 4620.8229, 4619.7838, 4618.7875, 4618.5086, 4616.8102, 4616.3013, 4615.6196, 4613.4662, 4612.5099, 4611.6147] +25-08-31 10:51:29 | D | - iter = [ 30] +25-08-31 10:51:29 | D | - error = [ 4622.4468] +25-08-31 10:51:29 | D | - best error = [ 4611.6147] +25-08-31 10:51:29 | D | + Adding low-rank branches to single_transformer_blocks.2.proj_mlp +25-08-31 10:51:30 | D | - Calibrating low-rank branch for single_transformer_blocks.2.proj_out.linears.1 +25-08-31 10:51:30 | D | + w: sfp4_e2m1_all +25-08-31 10:51:30 | D | + x: sfp4_e2m1_all +25-08-31 10:51:30 | D | + y: None +25-08-31 10:51:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:51:30 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:51:30 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:51:34 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:51:53 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 10:51:53 | D | - error = [ 6845.9319, 6802.2214, 6775.0613, 6752.3667, 6736.1853, 6739.9549] +25-08-31 10:51:53 | D | - best error = [ 6845.9319, 6802.2214, 6775.0613, 6752.3667, 6736.1853, 6736.1853] +25-08-31 10:51:53 | D | + Adding low-rank branches to single_transformer_blocks.2.proj_out.linears.1 +25-08-31 10:52:11 | D | - Calibrating low-rank branches of block single_transformer_blocks.3 +25-08-31 10:52:11 | D | - Calibrating low-rank branch for single_transformer_blocks.3.attn.to_q, single_transformer_blocks.3.attn.to_k, single_transformer_blocks.3.attn.to_v +25-08-31 10:52:11 | D | + w: sfp4_e2m1_all +25-08-31 10:52:11 | D | + x: sfp4_e2m1_all +25-08-31 10:52:11 | D | + y: None +25-08-31 10:52:11 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:52:11 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:52:11 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:52:13 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:52:34 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:52:34 | D | - error = [ 9976.5922, 9765.7950, 9680.9628, 9605.2481, 9554.1798, 9458.3572, 9449.6823, 9411.0356, 9385.8097, 9390.5778] +25-08-31 10:52:34 | D | - best error = [ 9976.5922, 9765.7950, 9680.9628, 9605.2481, 9554.1798, 9458.3572, 9449.6823, 9411.0356, 9385.8097, 9385.8097] +25-08-31 10:52:35 | D | + Adding low-rank branches to single_transformer_blocks.3.attn.to_q, single_transformer_blocks.3.attn.to_k, single_transformer_blocks.3.attn.to_v +25-08-31 10:52:35 | D | - Calibrating low-rank branch for single_transformer_blocks.3.proj_out.linears.0 +25-08-31 10:52:35 | D | + w: sfp4_e2m1_all +25-08-31 10:52:35 | D | + x: sfp4_e2m1_all +25-08-31 10:52:35 | D | + y: None +25-08-31 10:52:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:52:35 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:52:35 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:52:36 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:52:51 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:52:51 | D | - error = [ 4500.8146, 4461.2702, 4431.9623, 4410.5773, 4392.6639, 4379.1054, 4368.5870, 4359.1747, 4355.3142, 4348.8479] +25-08-31 10:52:51 | D | - best error = [ 4500.8146, 4461.2702, 4431.9623, 4410.5773, 4392.6639, 4379.1054, 4368.5870, 4359.1747, 4355.3142, 4348.8479] +25-08-31 10:53:01 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16] +25-08-31 10:53:01 | D | - error = [ 4345.7805, 4338.5117, 4334.6642, 4329.6383, 4323.5726, 4319.9461, 4326.4410] +25-08-31 10:53:01 | D | - best error = [ 4345.7805, 4338.5117, 4334.6642, 4329.6383, 4323.5726, 4319.9461, 4319.9461] +25-08-31 10:53:01 | D | + Adding low-rank branches to single_transformer_blocks.3.proj_out.linears.0 +25-08-31 10:53:01 | D | - Calibrating low-rank branch for single_transformer_blocks.3.proj_mlp +25-08-31 10:53:01 | D | + w: sfp4_e2m1_all +25-08-31 10:53:01 | D | + x: sfp4_e2m1_all +25-08-31 10:53:01 | D | + y: None +25-08-31 10:53:01 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:53:01 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:53:01 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:53:03 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:53:16 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 10:53:16 | D | - error = [ 5528.0519, 5479.3445, 5450.3866, 5419.3313, 5399.0042, 5399.0525] +25-08-31 10:53:16 | D | - best error = [ 5528.0519, 5479.3445, 5450.3866, 5419.3313, 5399.0042, 5399.0042] +25-08-31 10:53:16 | D | + Adding low-rank branches to single_transformer_blocks.3.proj_mlp +25-08-31 10:53:16 | D | - Calibrating low-rank branch for single_transformer_blocks.3.proj_out.linears.1 +25-08-31 10:53:16 | D | + w: sfp4_e2m1_all +25-08-31 10:53:16 | D | + x: sfp4_e2m1_all +25-08-31 10:53:16 | D | + y: None +25-08-31 10:53:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:53:17 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:53:17 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:53:21 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:53:53 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:53:53 | D | - error = [ 6524.2005, 6487.7944, 6459.3622, 6439.2969, 6421.6291, 6406.4967, 6397.7627, 6387.0685, 6380.9601, 6372.3001] +25-08-31 10:53:53 | D | - best error = [ 6524.2005, 6487.7944, 6459.3622, 6439.2969, 6421.6291, 6406.4967, 6397.7627, 6387.0685, 6380.9601, 6372.3001] +25-08-31 10:54:25 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:54:25 | D | - error = [ 6367.3976, 6361.4747, 6353.4762, 6349.1710, 6344.9120, 6341.7673, 6337.8649, 6333.0855, 6331.0773, 6329.6041] +25-08-31 10:54:25 | D | - best error = [ 6367.3976, 6361.4747, 6353.4762, 6349.1710, 6344.9120, 6341.7673, 6337.8649, 6333.0855, 6331.0773, 6329.6041] +25-08-31 10:54:57 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 10:54:57 | D | - error = [ 6327.0480, 6324.7210, 6321.7352, 6318.3365, 6317.4352, 6316.4224, 6314.6560, 6312.5995, 6311.4094, 6309.3924] +25-08-31 10:54:57 | D | - best error = [ 6327.0480, 6324.7210, 6321.7352, 6318.3365, 6317.4352, 6316.4224, 6314.6560, 6312.5995, 6311.4094, 6309.3924] +25-08-31 10:55:16 | D | - iter = [ 30, 31, 32, 33, 34, 35] +25-08-31 10:55:16 | D | - error = [ 6307.4179, 6305.5750, 6303.6447, 6301.9927, 6301.6160, 6301.6585] +25-08-31 10:55:16 | D | - best error = [ 6307.4179, 6305.5750, 6303.6447, 6301.9927, 6301.6160, 6301.6160] +25-08-31 10:55:16 | D | + Adding low-rank branches to single_transformer_blocks.3.proj_out.linears.1 +25-08-31 10:55:34 | D | - Calibrating low-rank branches of block single_transformer_blocks.4 +25-08-31 10:55:34 | D | - Calibrating low-rank branch for single_transformer_blocks.4.attn.to_q, single_transformer_blocks.4.attn.to_k, single_transformer_blocks.4.attn.to_v +25-08-31 10:55:34 | D | + w: sfp4_e2m1_all +25-08-31 10:55:34 | D | + x: sfp4_e2m1_all +25-08-31 10:55:34 | D | + y: None +25-08-31 10:55:34 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:55:34 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:55:34 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:55:36 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:55:49 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 10:55:49 | D | - error = [10471.2857, 10364.2941, 10267.0779, 10127.8890, 10003.9000, 10038.5186] +25-08-31 10:55:49 | D | - best error = [10471.2857, 10364.2941, 10267.0779, 10127.8890, 10003.9000, 10003.9000] +25-08-31 10:55:49 | D | + Adding low-rank branches to single_transformer_blocks.4.attn.to_q, single_transformer_blocks.4.attn.to_k, single_transformer_blocks.4.attn.to_v +25-08-31 10:55:49 | D | - Calibrating low-rank branch for single_transformer_blocks.4.proj_out.linears.0 +25-08-31 10:55:49 | D | + w: sfp4_e2m1_all +25-08-31 10:55:49 | D | + x: sfp4_e2m1_all +25-08-31 10:55:49 | D | + y: None +25-08-31 10:55:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:55:49 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:55:49 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:55:50 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:56:05 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:56:05 | D | - error = [ 6118.1546, 6064.4258, 6029.0656, 6002.0270, 5975.9180, 5949.2568, 5935.3723, 5933.6296, 5913.0967, 5902.7173] +25-08-31 10:56:05 | D | - best error = [ 6118.1546, 6064.4258, 6029.0656, 6002.0270, 5975.9180, 5949.2568, 5935.3723, 5933.6296, 5913.0967, 5902.7173] +25-08-31 10:56:06 | D | - iter = [ 10] +25-08-31 10:56:06 | D | - error = [ 5909.5022] +25-08-31 10:56:06 | D | - best error = [ 5902.7173] +25-08-31 10:56:07 | D | + Adding low-rank branches to single_transformer_blocks.4.proj_out.linears.0 +25-08-31 10:56:07 | D | - Calibrating low-rank branch for single_transformer_blocks.4.proj_mlp +25-08-31 10:56:07 | D | + w: sfp4_e2m1_all +25-08-31 10:56:07 | D | + x: sfp4_e2m1_all +25-08-31 10:56:07 | D | + y: None +25-08-31 10:56:07 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:56:07 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:56:07 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:56:08 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:56:29 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:56:29 | D | - error = [ 4986.3042, 4947.7027, 4916.4068, 4903.6325, 4882.7834, 4870.3936, 4859.1746, 4849.4255, 4843.0224, 4837.5347] +25-08-31 10:56:29 | D | - best error = [ 4986.3042, 4947.7027, 4916.4068, 4903.6325, 4882.7834, 4870.3936, 4859.1746, 4849.4255, 4843.0224, 4837.5347] +25-08-31 10:56:44 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16] +25-08-31 10:56:44 | D | - error = [ 4831.9810, 4827.8674, 4824.1063, 4822.2540, 4818.5604, 4814.7183, 4825.0490] +25-08-31 10:56:44 | D | - best error = [ 4831.9810, 4827.8674, 4824.1063, 4822.2540, 4818.5604, 4814.7183, 4814.7183] +25-08-31 10:56:45 | D | + Adding low-rank branches to single_transformer_blocks.4.proj_mlp +25-08-31 10:56:45 | D | - Calibrating low-rank branch for single_transformer_blocks.4.proj_out.linears.1 +25-08-31 10:56:45 | D | + w: sfp4_e2m1_all +25-08-31 10:56:45 | D | + x: sfp4_e2m1_all +25-08-31 10:56:45 | D | + y: None +25-08-31 10:56:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:56:45 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:56:45 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:56:50 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:57:21 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:57:21 | D | - error = [ 4512.2402, 4488.6301, 4484.2469, 4466.3506, 4454.2640, 4443.2049, 4435.7516, 4426.9669, 4423.5678, 4418.5443] +25-08-31 10:57:21 | D | - best error = [ 4512.2402, 4488.6301, 4484.2469, 4466.3506, 4454.2640, 4443.2049, 4435.7516, 4426.9669, 4423.5678, 4418.5443] +25-08-31 10:57:34 | D | - iter = [ 10, 11, 12, 13] +25-08-31 10:57:34 | D | - error = [ 4413.9597, 4410.6823, 4407.3067, 4408.9954] +25-08-31 10:57:34 | D | - best error = [ 4413.9597, 4410.6823, 4407.3067, 4407.3067] +25-08-31 10:57:34 | D | + Adding low-rank branches to single_transformer_blocks.4.proj_out.linears.1 +25-08-31 10:57:52 | D | - Calibrating low-rank branches of block single_transformer_blocks.5 +25-08-31 10:57:52 | D | - Calibrating low-rank branch for single_transformer_blocks.5.attn.to_q, single_transformer_blocks.5.attn.to_k, single_transformer_blocks.5.attn.to_v +25-08-31 10:57:52 | D | + w: sfp4_e2m1_all +25-08-31 10:57:52 | D | + x: sfp4_e2m1_all +25-08-31 10:57:52 | D | + y: None +25-08-31 10:57:52 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:57:52 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:57:52 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:57:54 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:58:00 | D | - iter = [ 0, 1, 2] +25-08-31 10:58:00 | D | - error = [11140.4140, 10967.8979, 11019.2824] +25-08-31 10:58:00 | D | - best error = [11140.4140, 10967.8979, 10967.8979] +25-08-31 10:58:00 | D | + Adding low-rank branches to single_transformer_blocks.5.attn.to_q, single_transformer_blocks.5.attn.to_k, single_transformer_blocks.5.attn.to_v +25-08-31 10:58:00 | D | - Calibrating low-rank branch for single_transformer_blocks.5.proj_out.linears.0 +25-08-31 10:58:00 | D | + w: sfp4_e2m1_all +25-08-31 10:58:00 | D | + x: sfp4_e2m1_all +25-08-31 10:58:00 | D | + y: None +25-08-31 10:58:00 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:58:00 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 10:58:00 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 10:58:01 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:58:15 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 10:58:15 | D | - error = [ 4163.8302, 4126.9117, 4110.2095, 4089.0265, 4076.9251, 4059.6967, 4050.6139, 4041.4428, 4043.1922] +25-08-31 10:58:15 | D | - best error = [ 4163.8302, 4126.9117, 4110.2095, 4089.0265, 4076.9251, 4059.6967, 4050.6139, 4041.4428, 4041.4428] +25-08-31 10:58:15 | D | + Adding low-rank branches to single_transformer_blocks.5.proj_out.linears.0 +25-08-31 10:58:15 | D | - Calibrating low-rank branch for single_transformer_blocks.5.proj_mlp +25-08-31 10:58:15 | D | + w: sfp4_e2m1_all +25-08-31 10:58:15 | D | + x: sfp4_e2m1_all +25-08-31 10:58:15 | D | + y: None +25-08-31 10:58:15 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:58:15 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:58:15 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:58:16 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:58:37 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:58:37 | D | - error = [ 5185.7902, 5135.5156, 5103.3812, 5081.4506, 5063.9380, 5049.8735, 5038.3610, 5029.0615, 5021.7065, 5015.1036] +25-08-31 10:58:37 | D | - best error = [ 5185.7902, 5135.5156, 5103.3812, 5081.4506, 5063.9380, 5049.8735, 5038.3610, 5029.0615, 5021.7065, 5015.1036] +25-08-31 10:58:58 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 10:58:58 | D | - error = [ 5009.9261, 5004.7595, 4999.0979, 4994.7827, 4992.4465, 4989.4060, 4985.7864, 4983.4781, 4981.1555, 4978.7217] +25-08-31 10:58:58 | D | - best error = [ 5009.9261, 5004.7595, 4999.0979, 4994.7827, 4992.4465, 4989.4060, 4985.7864, 4983.4781, 4981.1555, 4978.7217] +25-08-31 10:59:16 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27] +25-08-31 10:59:16 | D | - error = [ 4976.6217, 4974.7036, 4972.5959, 4971.6663, 4970.5171, 4968.1451, 4966.0673, 4982.5026] +25-08-31 10:59:16 | D | - best error = [ 4976.6217, 4974.7036, 4972.5959, 4971.6663, 4970.5171, 4968.1451, 4966.0673, 4966.0673] +25-08-31 10:59:16 | D | + Adding low-rank branches to single_transformer_blocks.5.proj_mlp +25-08-31 10:59:16 | D | - Calibrating low-rank branch for single_transformer_blocks.5.proj_out.linears.1 +25-08-31 10:59:16 | D | + w: sfp4_e2m1_all +25-08-31 10:59:16 | D | + x: sfp4_e2m1_all +25-08-31 10:59:16 | D | + y: None +25-08-31 10:59:16 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 10:59:16 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 10:59:16 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 10:59:21 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 10:59:52 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 10:59:52 | D | - error = [ 4690.3251, 4661.8108, 4641.7486, 4628.2748, 4625.5545, 4613.3595, 4603.6249, 4595.7387, 4589.0110, 4585.7841] +25-08-31 10:59:52 | D | - best error = [ 4690.3251, 4661.8108, 4641.7486, 4628.2748, 4625.5545, 4613.3595, 4603.6249, 4595.7387, 4589.0110, 4585.7841] +25-08-31 11:00:21 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18] +25-08-31 11:00:21 | D | - error = [ 4580.1212, 4577.4687, 4574.1906, 4572.6060, 4569.9002, 4565.5685, 4563.1980, 4558.0160, 4558.2828] +25-08-31 11:00:21 | D | - best error = [ 4580.1212, 4577.4687, 4574.1906, 4572.6060, 4569.9002, 4565.5685, 4563.1980, 4558.0160, 4558.0160] +25-08-31 11:00:22 | D | + Adding low-rank branches to single_transformer_blocks.5.proj_out.linears.1 +25-08-31 11:00:39 | D | - Calibrating low-rank branches of block single_transformer_blocks.6 +25-08-31 11:00:39 | D | - Calibrating low-rank branch for single_transformer_blocks.6.attn.to_q, single_transformer_blocks.6.attn.to_k, single_transformer_blocks.6.attn.to_v +25-08-31 11:00:39 | D | + w: sfp4_e2m1_all +25-08-31 11:00:39 | D | + x: sfp4_e2m1_all +25-08-31 11:00:39 | D | + y: None +25-08-31 11:00:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:00:39 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 11:00:39 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 11:00:41 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:00:56 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 11:00:56 | D | - error = [12876.5940, 12519.6460, 12392.9780, 12392.4364, 12220.9316, 12098.5474, 12173.6897] +25-08-31 11:00:56 | D | - best error = [12876.5940, 12519.6460, 12392.9780, 12392.4364, 12220.9316, 12098.5474, 12098.5474] +25-08-31 11:00:56 | D | + Adding low-rank branches to single_transformer_blocks.6.attn.to_q, single_transformer_blocks.6.attn.to_k, single_transformer_blocks.6.attn.to_v +25-08-31 11:00:56 | D | - Calibrating low-rank branch for single_transformer_blocks.6.proj_out.linears.0 +25-08-31 11:00:56 | D | + w: sfp4_e2m1_all +25-08-31 11:00:56 | D | + x: sfp4_e2m1_all +25-08-31 11:00:56 | D | + y: None +25-08-31 11:00:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:00:56 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 11:00:56 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 11:00:57 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 11:01:12 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:01:12 | D | - error = [ 4053.7311, 4013.4226, 3989.5653, 3972.3128, 3956.7662, 3944.1916, 3933.8606, 3926.0777, 3921.9635, 3931.7707] +25-08-31 11:01:12 | D | - best error = [ 4053.7311, 4013.4226, 3989.5653, 3972.3128, 3956.7662, 3944.1916, 3933.8606, 3926.0777, 3921.9635, 3921.9635] +25-08-31 11:01:13 | D | + Adding low-rank branches to single_transformer_blocks.6.proj_out.linears.0 +25-08-31 11:01:13 | D | - Calibrating low-rank branch for single_transformer_blocks.6.proj_mlp +25-08-31 11:01:13 | D | + w: sfp4_e2m1_all +25-08-31 11:01:13 | D | + x: sfp4_e2m1_all +25-08-31 11:01:13 | D | + y: None +25-08-31 11:01:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:01:13 | D | + finished parsing calibration arguments, ram usage: 14.4 +25-08-31 11:01:13 | D | + finished resetting calibrator, ram usage: 14.4 +25-08-31 11:01:14 | D | + finished calculating the original outputs, ram usage: 14.4 +25-08-31 11:01:35 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:01:35 | D | - error = [ 5143.2927, 5096.6426, 5065.0917, 5040.2748, 5021.9078, 5007.1179, 4996.7720, 4986.9536, 4980.2090, 4972.8032] +25-08-31 11:01:35 | D | - best error = [ 5143.2927, 5096.6426, 5065.0917, 5040.2748, 5021.9078, 5007.1179, 4996.7720, 4986.9536, 4980.2090, 4972.8032] +25-08-31 11:01:43 | D | - iter = [ 10, 11, 12, 13] +25-08-31 11:01:43 | D | - error = [ 4967.5090, 4963.0656, 4957.7914, 4965.8433] +25-08-31 11:01:43 | D | - best error = [ 4967.5090, 4963.0656, 4957.7914, 4957.7914] +25-08-31 11:01:44 | D | + Adding low-rank branches to single_transformer_blocks.6.proj_mlp +25-08-31 11:01:44 | D | - Calibrating low-rank branch for single_transformer_blocks.6.proj_out.linears.1 +25-08-31 11:01:44 | D | + w: sfp4_e2m1_all +25-08-31 11:01:44 | D | + x: sfp4_e2m1_all +25-08-31 11:01:44 | D | + y: None +25-08-31 11:01:44 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:01:44 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:01:44 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:01:49 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:02:21 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:02:21 | D | - error = [ 6417.9956, 6382.5650, 6356.6966, 6335.4836, 6318.2710, 6303.2468, 6292.5670, 6284.0471, 6273.9429, 6266.5633] +25-08-31 11:02:21 | D | - best error = [ 6417.9956, 6382.5650, 6356.6966, 6335.4836, 6318.2710, 6303.2468, 6292.5670, 6284.0471, 6273.9429, 6266.5633] +25-08-31 11:02:53 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:02:53 | D | - error = [ 6262.7534, 6256.9865, 6250.2986, 6243.9379, 6241.5152, 6237.4870, 6235.4057, 6230.8003, 6229.5987, 6225.6688] +25-08-31 11:02:53 | D | - best error = [ 6262.7534, 6256.9865, 6250.2986, 6243.9379, 6241.5152, 6237.4870, 6235.4057, 6230.8003, 6229.5987, 6225.6688] +25-08-31 11:03:25 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 11:03:25 | D | - error = [ 6222.5098, 6221.1674, 6217.9546, 6216.7760, 6214.2308, 6212.8296, 6211.3879, 6208.5812, 6207.1385, 6204.8091] +25-08-31 11:03:25 | D | - best error = [ 6222.5098, 6221.1674, 6217.9546, 6216.7760, 6214.2308, 6212.8296, 6211.3879, 6208.5812, 6207.1385, 6204.8091] +25-08-31 11:03:57 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-31 11:03:57 | D | - error = [ 6203.9487, 6201.8201, 6200.6932, 6199.4690, 6198.7347, 6195.9935, 6195.7953, 6194.8466, 6194.2112, 6193.6325] +25-08-31 11:03:57 | D | - best error = [ 6203.9487, 6201.8201, 6200.6932, 6199.4690, 6198.7347, 6195.9935, 6195.7953, 6194.8466, 6194.2112, 6193.6325] +25-08-31 11:04:20 | D | - iter = [ 40, 41, 42, 43, 44, 45, 46] +25-08-31 11:04:20 | D | - error = [ 6192.6479, 6189.9683, 6187.2042, 6186.8092, 6184.9970, 6183.4566, 6183.9609] +25-08-31 11:04:20 | D | - best error = [ 6192.6479, 6189.9683, 6187.2042, 6186.8092, 6184.9970, 6183.4566, 6183.4566] +25-08-31 11:04:20 | D | + Adding low-rank branches to single_transformer_blocks.6.proj_out.linears.1 +25-08-31 11:04:38 | D | - Calibrating low-rank branches of block single_transformer_blocks.7 +25-08-31 11:04:38 | D | - Calibrating low-rank branch for single_transformer_blocks.7.attn.to_q, single_transformer_blocks.7.attn.to_k, single_transformer_blocks.7.attn.to_v +25-08-31 11:04:38 | D | + w: sfp4_e2m1_all +25-08-31 11:04:38 | D | + x: sfp4_e2m1_all +25-08-31 11:04:38 | D | + y: None +25-08-31 11:04:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:04:38 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:04:38 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:04:39 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:04:50 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-31 11:04:50 | D | - error = [13506.9748, 13325.6156, 13066.4621, 13064.3166, 13237.9527] +25-08-31 11:04:50 | D | - best error = [13506.9748, 13325.6156, 13066.4621, 13064.3166, 13064.3166] +25-08-31 11:04:50 | D | + Adding low-rank branches to single_transformer_blocks.7.attn.to_q, single_transformer_blocks.7.attn.to_k, single_transformer_blocks.7.attn.to_v +25-08-31 11:04:51 | D | - Calibrating low-rank branch for single_transformer_blocks.7.proj_out.linears.0 +25-08-31 11:04:51 | D | + w: sfp4_e2m1_all +25-08-31 11:04:51 | D | + x: sfp4_e2m1_all +25-08-31 11:04:51 | D | + y: None +25-08-31 11:04:51 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:04:51 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:04:51 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:04:51 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:05:02 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 11:05:02 | D | - error = [ 3238.6544, 3204.9719, 3187.8681, 3186.7858, 3174.4881, 3157.8062, 3161.3171] +25-08-31 11:05:02 | D | - best error = [ 3238.6544, 3204.9719, 3187.8681, 3186.7858, 3174.4881, 3157.8062, 3157.8062] +25-08-31 11:05:02 | D | + Adding low-rank branches to single_transformer_blocks.7.proj_out.linears.0 +25-08-31 11:05:03 | D | - Calibrating low-rank branch for single_transformer_blocks.7.proj_mlp +25-08-31 11:05:03 | D | + w: sfp4_e2m1_all +25-08-31 11:05:03 | D | + x: sfp4_e2m1_all +25-08-31 11:05:03 | D | + y: None +25-08-31 11:05:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:05:03 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:05:03 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:05:04 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:05:25 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:05:25 | D | - error = [ 4715.3358, 4681.1347, 4659.9092, 4643.4980, 4630.7619, 4622.2663, 4613.2143, 4605.9264, 4599.2330, 4594.4079] +25-08-31 11:05:25 | D | - best error = [ 4715.3358, 4681.1347, 4659.9092, 4643.4980, 4630.7619, 4622.2663, 4613.2143, 4605.9264, 4599.2330, 4594.4079] +25-08-31 11:05:46 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:05:46 | D | - error = [ 4589.9050, 4585.9529, 4584.0305, 4581.3461, 4579.3768, 4576.6767, 4573.8518, 4571.8808, 4569.8280, 4568.6860] +25-08-31 11:05:46 | D | - best error = [ 4589.9050, 4585.9529, 4584.0305, 4581.3461, 4579.3768, 4576.6767, 4573.8518, 4571.8808, 4569.8280, 4568.6860] +25-08-31 11:06:07 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 11:06:07 | D | - error = [ 4568.0858, 4566.9427, 4565.3471, 4564.2033, 4562.4062, 4561.9340, 4561.3296, 4560.7403, 4560.0171, 4559.6050] +25-08-31 11:06:07 | D | - best error = [ 4568.0858, 4566.9427, 4565.3471, 4564.2033, 4562.4062, 4561.9340, 4561.3296, 4560.7403, 4560.0171, 4559.6050] +25-08-31 11:06:18 | D | - iter = [ 30, 31, 32, 33, 34] +25-08-31 11:06:18 | D | - error = [ 4558.8266, 4557.2408, 4555.8062, 4555.4512, 4555.6971] +25-08-31 11:06:18 | D | - best error = [ 4558.8266, 4557.2408, 4555.8062, 4555.4512, 4555.4512] +25-08-31 11:06:18 | D | + Adding low-rank branches to single_transformer_blocks.7.proj_mlp +25-08-31 11:06:18 | D | - Calibrating low-rank branch for single_transformer_blocks.7.proj_out.linears.1 +25-08-31 11:06:18 | D | + w: sfp4_e2m1_all +25-08-31 11:06:18 | D | + x: sfp4_e2m1_all +25-08-31 11:06:18 | D | + y: None +25-08-31 11:06:18 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:06:18 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:06:18 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:06:23 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:06:43 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 11:06:43 | D | - error = [ 4993.2430, 4960.8721, 4941.0108, 4928.6509, 4915.7707, 4922.7285] +25-08-31 11:06:43 | D | - best error = [ 4993.2430, 4960.8721, 4941.0108, 4928.6509, 4915.7707, 4915.7707] +25-08-31 11:06:43 | D | + Adding low-rank branches to single_transformer_blocks.7.proj_out.linears.1 +25-08-31 11:07:01 | D | - Calibrating low-rank branches of block single_transformer_blocks.8 +25-08-31 11:07:01 | D | - Calibrating low-rank branch for single_transformer_blocks.8.attn.to_q, single_transformer_blocks.8.attn.to_k, single_transformer_blocks.8.attn.to_v +25-08-31 11:07:01 | D | + w: sfp4_e2m1_all +25-08-31 11:07:01 | D | + x: sfp4_e2m1_all +25-08-31 11:07:01 | D | + y: None +25-08-31 11:07:01 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:07:01 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:07:01 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:07:02 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:07:13 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-31 11:07:13 | D | - error = [15050.7653, 15033.5327, 14602.4157, 14538.6976, 14707.7313] +25-08-31 11:07:13 | D | - best error = [15050.7653, 15033.5327, 14602.4157, 14538.6976, 14538.6976] +25-08-31 11:07:13 | D | + Adding low-rank branches to single_transformer_blocks.8.attn.to_q, single_transformer_blocks.8.attn.to_k, single_transformer_blocks.8.attn.to_v +25-08-31 11:07:13 | D | - Calibrating low-rank branch for single_transformer_blocks.8.proj_out.linears.0 +25-08-31 11:07:13 | D | + w: sfp4_e2m1_all +25-08-31 11:07:13 | D | + x: sfp4_e2m1_all +25-08-31 11:07:13 | D | + y: None +25-08-31 11:07:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:07:13 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:07:13 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:07:14 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:07:29 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:07:29 | D | - error = [ 4356.5002, 4329.2237, 4294.4595, 4271.3689, 4252.5007, 4242.9087, 4234.2293, 4224.5407, 4219.9163, 4211.9103] +25-08-31 11:07:29 | D | - best error = [ 4356.5002, 4329.2237, 4294.4595, 4271.3689, 4252.5007, 4242.9087, 4234.2293, 4224.5407, 4219.9163, 4211.9103] +25-08-31 11:07:44 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:07:44 | D | - error = [ 4207.3488, 4200.3700, 4197.7089, 4194.9968, 4189.2059, 4185.9219, 4182.3866, 4181.5916, 4179.5414, 4178.9128] +25-08-31 11:07:44 | D | - best error = [ 4207.3488, 4200.3700, 4197.7089, 4194.9968, 4189.2059, 4185.9219, 4182.3866, 4181.5916, 4179.5414, 4178.9128] +25-08-31 11:07:59 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 11:07:59 | D | - error = [ 4177.5775, 4175.3292, 4172.1874, 4170.7817, 4169.4007, 4168.9862, 4166.3432, 4165.7981, 4165.6301, 4164.5259] +25-08-31 11:07:59 | D | - best error = [ 4177.5775, 4175.3292, 4172.1874, 4170.7817, 4169.4007, 4168.9862, 4166.3432, 4165.7981, 4165.6301, 4164.5259] +25-08-31 11:08:08 | D | - iter = [ 30, 31, 32, 33, 34, 35] +25-08-31 11:08:08 | D | - error = [ 4162.2922, 4160.7175, 4158.6317, 4157.1548, 4155.0369, 4155.1342] +25-08-31 11:08:08 | D | - best error = [ 4162.2922, 4160.7175, 4158.6317, 4157.1548, 4155.0369, 4155.0369] +25-08-31 11:08:08 | D | + Adding low-rank branches to single_transformer_blocks.8.proj_out.linears.0 +25-08-31 11:08:08 | D | - Calibrating low-rank branch for single_transformer_blocks.8.proj_mlp +25-08-31 11:08:08 | D | + w: sfp4_e2m1_all +25-08-31 11:08:08 | D | + x: sfp4_e2m1_all +25-08-31 11:08:08 | D | + y: None +25-08-31 11:08:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:08:08 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:08:08 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:08:10 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:08:31 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:08:31 | D | - error = [ 4712.8312, 4675.9805, 4662.2211, 4635.1203, 4621.7984, 4604.4092, 4593.5627, 4583.3315, 4577.7020, 4570.7476] +25-08-31 11:08:31 | D | - best error = [ 4712.8312, 4675.9805, 4662.2211, 4635.1203, 4621.7984, 4604.4092, 4593.5627, 4583.3315, 4577.7020, 4570.7476] +25-08-31 11:08:38 | D | - iter = [ 10, 11, 12] +25-08-31 11:08:38 | D | - error = [ 4566.9329, 4561.7363, 4584.4836] +25-08-31 11:08:38 | D | - best error = [ 4566.9329, 4561.7363, 4561.7363] +25-08-31 11:08:38 | D | + Adding low-rank branches to single_transformer_blocks.8.proj_mlp +25-08-31 11:08:38 | D | - Calibrating low-rank branch for single_transformer_blocks.8.proj_out.linears.1 +25-08-31 11:08:38 | D | + w: sfp4_e2m1_all +25-08-31 11:08:38 | D | + x: sfp4_e2m1_all +25-08-31 11:08:38 | D | + y: None +25-08-31 11:08:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:08:38 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:08:38 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:08:43 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:09:14 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:09:14 | D | - error = [ 6283.1839, 6247.7330, 6222.5233, 6200.1910, 6184.5284, 6175.6483, 6160.8921, 6151.1079, 6142.6887, 6133.5269] +25-08-31 11:09:14 | D | - best error = [ 6283.1839, 6247.7330, 6222.5233, 6200.1910, 6184.5284, 6175.6483, 6160.8921, 6151.1079, 6142.6887, 6133.5269] +25-08-31 11:09:47 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:09:47 | D | - error = [ 6126.5710, 6117.8617, 6112.8282, 6108.0917, 6105.5824, 6100.4449, 6098.8461, 6094.7259, 6091.8010, 6090.1015] +25-08-31 11:09:47 | D | - best error = [ 6126.5710, 6117.8617, 6112.8282, 6108.0917, 6105.5824, 6100.4449, 6098.8461, 6094.7259, 6091.8010, 6090.1015] +25-08-31 11:10:18 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 11:10:18 | D | - error = [ 6085.8669, 6084.4198, 6082.3246, 6081.5172, 6077.3132, 6075.7307, 6072.5507, 6072.5020, 6072.0211, 6072.7924] +25-08-31 11:10:18 | D | - best error = [ 6085.8669, 6084.4198, 6082.3246, 6081.5172, 6077.3132, 6075.7307, 6072.5507, 6072.5020, 6072.0211, 6072.0211] +25-08-31 11:10:19 | D | + Adding low-rank branches to single_transformer_blocks.8.proj_out.linears.1 +25-08-31 11:10:37 | D | - Calibrating low-rank branches of block single_transformer_blocks.9 +25-08-31 11:10:37 | D | - Calibrating low-rank branch for single_transformer_blocks.9.attn.to_q, single_transformer_blocks.9.attn.to_k, single_transformer_blocks.9.attn.to_v +25-08-31 11:10:37 | D | + w: sfp4_e2m1_all +25-08-31 11:10:37 | D | + x: sfp4_e2m1_all +25-08-31 11:10:37 | D | + y: None +25-08-31 11:10:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:10:37 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:10:37 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:10:38 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:11:00 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:11:00 | D | - error = [17326.6657, 16935.6684, 16819.1063, 16720.1120, 16593.0935, 16570.6063, 16461.5120, 16356.2216, 16337.3991, 16357.2741] +25-08-31 11:11:00 | D | - best error = [17326.6657, 16935.6684, 16819.1063, 16720.1120, 16593.0935, 16570.6063, 16461.5120, 16356.2216, 16337.3991, 16337.3991] +25-08-31 11:11:00 | D | + Adding low-rank branches to single_transformer_blocks.9.attn.to_q, single_transformer_blocks.9.attn.to_k, single_transformer_blocks.9.attn.to_v +25-08-31 11:11:00 | D | - Calibrating low-rank branch for single_transformer_blocks.9.proj_out.linears.0 +25-08-31 11:11:00 | D | + w: sfp4_e2m1_all +25-08-31 11:11:00 | D | + x: sfp4_e2m1_all +25-08-31 11:11:00 | D | + y: None +25-08-31 11:11:00 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:11:00 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:11:00 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:11:02 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:11:16 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:11:16 | D | - error = [ 4253.6055, 4224.4815, 4197.9033, 4175.6710, 4158.6753, 4145.8015, 4134.2259, 4127.4966, 4119.6954, 4115.1027] +25-08-31 11:11:16 | D | - best error = [ 4253.6055, 4224.4815, 4197.9033, 4175.6710, 4158.6753, 4145.8015, 4134.2259, 4127.4966, 4119.6954, 4115.1027] +25-08-31 11:11:22 | D | - iter = [ 10, 11, 12, 13] +25-08-31 11:11:22 | D | - error = [ 4110.3727, 4107.7561, 4103.0718, 4114.5075] +25-08-31 11:11:22 | D | - best error = [ 4110.3727, 4107.7561, 4103.0718, 4103.0718] +25-08-31 11:11:22 | D | + Adding low-rank branches to single_transformer_blocks.9.proj_out.linears.0 +25-08-31 11:11:23 | D | - Calibrating low-rank branch for single_transformer_blocks.9.proj_mlp +25-08-31 11:11:23 | D | + w: sfp4_e2m1_all +25-08-31 11:11:23 | D | + x: sfp4_e2m1_all +25-08-31 11:11:23 | D | + y: None +25-08-31 11:11:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:11:23 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:11:23 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:11:24 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:11:46 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:11:46 | D | - error = [ 4718.7346, 4679.9947, 4654.7000, 4636.7398, 4623.0929, 4613.0090, 4612.7250, 4600.3222, 4592.2663, 4585.4282] +25-08-31 11:11:46 | D | - best error = [ 4718.7346, 4679.9947, 4654.7000, 4636.7398, 4623.0929, 4613.0090, 4612.7250, 4600.3222, 4592.2663, 4585.4282] +25-08-31 11:12:07 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:12:07 | D | - error = [ 4580.2865, 4575.3701, 4571.5582, 4567.9461, 4564.1288, 4561.5127, 4560.0309, 4558.7823, 4556.0263, 4555.1799] +25-08-31 11:12:07 | D | - best error = [ 4580.2865, 4575.3701, 4571.5582, 4567.9461, 4564.1288, 4561.5127, 4560.0309, 4558.7823, 4556.0263, 4555.1799] +25-08-31 11:12:26 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28] +25-08-31 11:12:26 | D | - error = [ 4552.1145, 4549.4681, 4548.1706, 4547.6627, 4545.8176, 4545.3229, 4543.8539, 4543.2354, 4552.0591] +25-08-31 11:12:26 | D | - best error = [ 4552.1145, 4549.4681, 4548.1706, 4547.6627, 4545.8176, 4545.3229, 4543.8539, 4543.2354, 4543.2354] +25-08-31 11:12:27 | D | + Adding low-rank branches to single_transformer_blocks.9.proj_mlp +25-08-31 11:12:27 | D | - Calibrating low-rank branch for single_transformer_blocks.9.proj_out.linears.1 +25-08-31 11:12:27 | D | + w: sfp4_e2m1_all +25-08-31 11:12:27 | D | + x: sfp4_e2m1_all +25-08-31 11:12:27 | D | + y: None +25-08-31 11:12:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:12:27 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:12:27 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:12:32 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:12:54 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 11:12:54 | D | - error = [ 6062.1142, 6032.4350, 6001.7957, 5980.7143, 5964.3145, 5945.5314, 5946.1840] +25-08-31 11:12:54 | D | - best error = [ 6062.1142, 6032.4350, 6001.7957, 5980.7143, 5964.3145, 5945.5314, 5945.5314] +25-08-31 11:12:54 | D | + Adding low-rank branches to single_transformer_blocks.9.proj_out.linears.1 +25-08-31 11:13:12 | D | - Calibrating low-rank branches of block single_transformer_blocks.10 +25-08-31 11:13:12 | D | - Calibrating low-rank branch for single_transformer_blocks.10.attn.to_q, single_transformer_blocks.10.attn.to_k, single_transformer_blocks.10.attn.to_v +25-08-31 11:13:12 | D | + w: sfp4_e2m1_all +25-08-31 11:13:12 | D | + x: sfp4_e2m1_all +25-08-31 11:13:12 | D | + y: None +25-08-31 11:13:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:13:12 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:13:12 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:13:13 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:13:26 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 11:13:26 | D | - error = [18977.6138, 18677.1612, 18593.5068, 18438.7913, 18236.7129, 18316.8399] +25-08-31 11:13:26 | D | - best error = [18977.6138, 18677.1612, 18593.5068, 18438.7913, 18236.7129, 18236.7129] +25-08-31 11:13:26 | D | + Adding low-rank branches to single_transformer_blocks.10.attn.to_q, single_transformer_blocks.10.attn.to_k, single_transformer_blocks.10.attn.to_v +25-08-31 11:13:27 | D | - Calibrating low-rank branch for single_transformer_blocks.10.proj_out.linears.0 +25-08-31 11:13:27 | D | + w: sfp4_e2m1_all +25-08-31 11:13:27 | D | + x: sfp4_e2m1_all +25-08-31 11:13:27 | D | + y: None +25-08-31 11:13:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:13:27 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:13:27 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:13:28 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:13:43 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:13:43 | D | - error = [ 3281.1094, 3248.4970, 3228.2649, 3224.8786, 3211.9489, 3203.8165, 3191.8581, 3184.3123, 3182.7846, 3176.9241] +25-08-31 11:13:43 | D | - best error = [ 3281.1094, 3248.4970, 3228.2649, 3224.8786, 3211.9489, 3203.8165, 3191.8581, 3184.3123, 3182.7846, 3176.9241] +25-08-31 11:13:54 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16] +25-08-31 11:13:54 | D | - error = [ 3172.5353, 3167.9866, 3163.2894, 3162.2064, 3158.8795, 3155.7237, 3162.6370] +25-08-31 11:13:54 | D | - best error = [ 3172.5353, 3167.9866, 3163.2894, 3162.2064, 3158.8795, 3155.7237, 3155.7237] +25-08-31 11:13:54 | D | + Adding low-rank branches to single_transformer_blocks.10.proj_out.linears.0 +25-08-31 11:13:54 | D | - Calibrating low-rank branch for single_transformer_blocks.10.proj_mlp +25-08-31 11:13:54 | D | + w: sfp4_e2m1_all +25-08-31 11:13:54 | D | + x: sfp4_e2m1_all +25-08-31 11:13:54 | D | + y: None +25-08-31 11:13:54 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:13:54 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:13:54 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:13:56 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:14:09 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 11:14:09 | D | - error = [ 4820.3884, 4778.7189, 4751.2255, 4730.7004, 4715.6287, 4717.3817] +25-08-31 11:14:09 | D | - best error = [ 4820.3884, 4778.7189, 4751.2255, 4730.7004, 4715.6287, 4715.6287] +25-08-31 11:14:09 | D | + Adding low-rank branches to single_transformer_blocks.10.proj_mlp +25-08-31 11:14:09 | D | - Calibrating low-rank branch for single_transformer_blocks.10.proj_out.linears.1 +25-08-31 11:14:09 | D | + w: sfp4_e2m1_all +25-08-31 11:14:09 | D | + x: sfp4_e2m1_all +25-08-31 11:14:09 | D | + y: None +25-08-31 11:14:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:14:09 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:14:09 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:14:14 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:14:46 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:14:46 | D | - error = [ 6576.7475, 6537.4285, 6513.3279, 6491.4125, 6474.1084, 6457.1207, 6446.7337, 6439.4478, 6433.8592, 6426.3778] +25-08-31 11:14:46 | D | - best error = [ 6576.7475, 6537.4285, 6513.3279, 6491.4125, 6474.1084, 6457.1207, 6446.7337, 6439.4478, 6433.8592, 6426.3778] +25-08-31 11:15:08 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16] +25-08-31 11:15:08 | D | - error = [ 6417.9966, 6406.8719, 6399.7266, 6391.0957, 6384.4730, 6384.2989, 6384.7606] +25-08-31 11:15:08 | D | - best error = [ 6417.9966, 6406.8719, 6399.7266, 6391.0957, 6384.4730, 6384.2989, 6384.2989] +25-08-31 11:15:08 | D | + Adding low-rank branches to single_transformer_blocks.10.proj_out.linears.1 +25-08-31 11:15:26 | D | - Calibrating low-rank branches of block single_transformer_blocks.11 +25-08-31 11:15:26 | D | - Calibrating low-rank branch for single_transformer_blocks.11.attn.to_q, single_transformer_blocks.11.attn.to_k, single_transformer_blocks.11.attn.to_v +25-08-31 11:15:26 | D | + w: sfp4_e2m1_all +25-08-31 11:15:26 | D | + x: sfp4_e2m1_all +25-08-31 11:15:26 | D | + y: None +25-08-31 11:15:26 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:15:26 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:15:26 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:15:28 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:15:45 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 11:15:45 | D | - error = [20387.6425, 19878.7831, 19765.0018, 19628.7332, 19609.2378, 19397.9125, 19209.6526, 19346.0396] +25-08-31 11:15:45 | D | - best error = [20387.6425, 19878.7831, 19765.0018, 19628.7332, 19609.2378, 19397.9125, 19209.6526, 19209.6526] +25-08-31 11:15:45 | D | + Adding low-rank branches to single_transformer_blocks.11.attn.to_q, single_transformer_blocks.11.attn.to_k, single_transformer_blocks.11.attn.to_v +25-08-31 11:15:45 | D | - Calibrating low-rank branch for single_transformer_blocks.11.proj_out.linears.0 +25-08-31 11:15:45 | D | + w: sfp4_e2m1_all +25-08-31 11:15:45 | D | + x: sfp4_e2m1_all +25-08-31 11:15:45 | D | + y: None +25-08-31 11:15:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:15:45 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:15:45 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:15:46 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:16:02 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:16:02 | D | - error = [ 3585.8030, 3554.3098, 3528.0539, 3520.6288, 3503.1867, 3499.2121, 3487.5318, 3479.8533, 3471.2632, 3465.5438] +25-08-31 11:16:02 | D | - best error = [ 3585.8030, 3554.3098, 3528.0539, 3520.6288, 3503.1867, 3499.2121, 3487.5318, 3479.8533, 3471.2632, 3465.5438] +25-08-31 11:16:16 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:16:16 | D | - error = [ 3462.1493, 3457.6577, 3454.5353, 3451.0471, 3449.4276, 3447.9085, 3442.4760, 3438.3103, 3437.1613, 3435.6187] +25-08-31 11:16:16 | D | - best error = [ 3462.1493, 3457.6577, 3454.5353, 3451.0471, 3449.4276, 3447.9085, 3442.4760, 3438.3103, 3437.1613, 3435.6187] +25-08-31 11:16:20 | D | - iter = [ 20, 21] +25-08-31 11:16:20 | D | - error = [ 3434.9022, 3440.8400] +25-08-31 11:16:20 | D | - best error = [ 3434.9022, 3434.9022] +25-08-31 11:16:20 | D | + Adding low-rank branches to single_transformer_blocks.11.proj_out.linears.0 +25-08-31 11:16:20 | D | - Calibrating low-rank branch for single_transformer_blocks.11.proj_mlp +25-08-31 11:16:20 | D | + w: sfp4_e2m1_all +25-08-31 11:16:20 | D | + x: sfp4_e2m1_all +25-08-31 11:16:20 | D | + y: None +25-08-31 11:16:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:16:20 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:16:20 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:16:22 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:16:43 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:16:43 | D | - error = [ 5268.0875, 5225.7912, 5188.6121, 5162.8604, 5145.3830, 5130.3045, 5119.5267, 5109.8072, 5103.1277, 5096.0440] +25-08-31 11:16:43 | D | - best error = [ 5268.0875, 5225.7912, 5188.6121, 5162.8604, 5145.3830, 5130.3045, 5119.5267, 5109.8072, 5103.1277, 5096.0440] +25-08-31 11:17:05 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:17:05 | D | - error = [ 5090.6891, 5083.9992, 5079.3671, 5075.4480, 5071.2772, 5066.6573, 5063.9878, 5061.8084, 5059.0331, 5055.4034] +25-08-31 11:17:05 | D | - best error = [ 5090.6891, 5083.9992, 5079.3671, 5075.4480, 5071.2772, 5066.6573, 5063.9878, 5061.8084, 5059.0331, 5055.4034] +25-08-31 11:17:27 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 11:17:27 | D | - error = [ 5054.0661, 5051.3355, 5048.7499, 5047.6517, 5046.0527, 5045.1200, 5043.8971, 5042.8123, 5041.6866, 5039.4361] +25-08-31 11:17:27 | D | - best error = [ 5054.0661, 5051.3355, 5048.7499, 5047.6517, 5046.0527, 5045.1200, 5043.8971, 5042.8123, 5041.6866, 5039.4361] +25-08-31 11:17:48 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-31 11:17:48 | D | - error = [ 5038.3990, 5037.5796, 5036.3920, 5035.8878, 5034.6098, 5034.1619, 5032.1504, 5032.0594, 5031.7138, 5030.4095] +25-08-31 11:17:48 | D | - best error = [ 5038.3990, 5037.5796, 5036.3920, 5035.8878, 5034.6098, 5034.1619, 5032.1504, 5032.0594, 5031.7138, 5030.4095] +25-08-31 11:17:59 | D | - iter = [ 40, 41, 42, 43, 44] +25-08-31 11:17:59 | D | - error = [ 5029.5290, 5029.2496, 5028.5614, 5027.9994, 5028.6086] +25-08-31 11:17:59 | D | - best error = [ 5029.5290, 5029.2496, 5028.5614, 5027.9994, 5027.9994] +25-08-31 11:17:59 | D | + Adding low-rank branches to single_transformer_blocks.11.proj_mlp +25-08-31 11:17:59 | D | - Calibrating low-rank branch for single_transformer_blocks.11.proj_out.linears.1 +25-08-31 11:17:59 | D | + w: sfp4_e2m1_all +25-08-31 11:17:59 | D | + x: sfp4_e2m1_all +25-08-31 11:17:59 | D | + y: None +25-08-31 11:17:59 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:17:59 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:17:59 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:18:04 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:18:36 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:18:36 | D | - error = [ 4748.3747, 4721.2100, 4702.2122, 4690.0597, 4675.1930, 4664.1074, 4657.6020, 4650.5727, 4645.6345, 4640.3797] +25-08-31 11:18:36 | D | - best error = [ 4748.3747, 4721.2100, 4702.2122, 4690.0597, 4675.1930, 4664.1074, 4657.6020, 4650.5727, 4645.6345, 4640.3797] +25-08-31 11:19:08 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:19:08 | D | - error = [ 4637.6099, 4635.9323, 4633.3044, 4627.2242, 4620.6123, 4618.4792, 4614.8677, 4611.2358, 4608.7228, 4607.5571] +25-08-31 11:19:08 | D | - best error = [ 4637.6099, 4635.9323, 4633.3044, 4627.2242, 4620.6123, 4618.4792, 4614.8677, 4611.2358, 4608.7228, 4607.5571] +25-08-31 11:19:21 | D | - iter = [ 20, 21, 22, 23] +25-08-31 11:19:21 | D | - error = [ 4605.2299, 4602.3033, 4599.9239, 4600.1133] +25-08-31 11:19:21 | D | - best error = [ 4605.2299, 4602.3033, 4599.9239, 4599.9239] +25-08-31 11:19:22 | D | + Adding low-rank branches to single_transformer_blocks.11.proj_out.linears.1 +25-08-31 11:19:39 | D | - Calibrating low-rank branches of block single_transformer_blocks.12 +25-08-31 11:19:39 | D | - Calibrating low-rank branch for single_transformer_blocks.12.attn.to_q, single_transformer_blocks.12.attn.to_k, single_transformer_blocks.12.attn.to_v +25-08-31 11:19:39 | D | + w: sfp4_e2m1_all +25-08-31 11:19:39 | D | + x: sfp4_e2m1_all +25-08-31 11:19:39 | D | + y: None +25-08-31 11:19:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:19:39 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:19:39 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:19:41 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:20:00 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 11:20:00 | D | - error = [22019.5077, 21742.1957, 21338.2541, 21191.0457, 21061.3405, 20953.5181, 20895.5482, 20884.0402, 20990.2370] +25-08-31 11:20:00 | D | - best error = [22019.5077, 21742.1957, 21338.2541, 21191.0457, 21061.3405, 20953.5181, 20895.5482, 20884.0402, 20884.0402] +25-08-31 11:20:01 | D | + Adding low-rank branches to single_transformer_blocks.12.attn.to_q, single_transformer_blocks.12.attn.to_k, single_transformer_blocks.12.attn.to_v +25-08-31 11:20:01 | D | - Calibrating low-rank branch for single_transformer_blocks.12.proj_out.linears.0 +25-08-31 11:20:01 | D | + w: sfp4_e2m1_all +25-08-31 11:20:01 | D | + x: sfp4_e2m1_all +25-08-31 11:20:01 | D | + y: None +25-08-31 11:20:01 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:20:01 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:20:01 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:20:02 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:20:17 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:20:17 | D | - error = [ 3308.8933, 3280.1040, 3265.1052, 3247.1883, 3235.3831, 3226.7169, 3221.1697, 3209.3275, 3202.7357, 3213.0991] +25-08-31 11:20:17 | D | - best error = [ 3308.8933, 3280.1040, 3265.1052, 3247.1883, 3235.3831, 3226.7169, 3221.1697, 3209.3275, 3202.7357, 3202.7357] +25-08-31 11:20:17 | D | + Adding low-rank branches to single_transformer_blocks.12.proj_out.linears.0 +25-08-31 11:20:17 | D | - Calibrating low-rank branch for single_transformer_blocks.12.proj_mlp +25-08-31 11:20:17 | D | + w: sfp4_e2m1_all +25-08-31 11:20:17 | D | + x: sfp4_e2m1_all +25-08-31 11:20:17 | D | + y: None +25-08-31 11:20:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:20:17 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:20:17 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:20:19 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:20:35 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 11:20:35 | D | - error = [ 4514.0638, 4486.4630, 4473.4477, 4469.2362, 4453.3960, 4442.9765, 4433.4743, 4434.5261] +25-08-31 11:20:35 | D | - best error = [ 4514.0638, 4486.4630, 4473.4477, 4469.2362, 4453.3960, 4442.9765, 4433.4743, 4433.4743] +25-08-31 11:20:35 | D | + Adding low-rank branches to single_transformer_blocks.12.proj_mlp +25-08-31 11:20:36 | D | - Calibrating low-rank branch for single_transformer_blocks.12.proj_out.linears.1 +25-08-31 11:20:36 | D | + w: sfp4_e2m1_all +25-08-31 11:20:36 | D | + x: sfp4_e2m1_all +25-08-31 11:20:36 | D | + y: None +25-08-31 11:20:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:20:36 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:20:36 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:20:40 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:21:12 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:21:12 | D | - error = [ 5216.1049, 5187.7168, 5163.2279, 5146.5637, 5130.8401, 5123.2107, 5113.1384, 5107.3318, 5101.8736, 5097.1839] +25-08-31 11:21:12 | D | - best error = [ 5216.1049, 5187.7168, 5163.2279, 5146.5637, 5130.8401, 5123.2107, 5113.1384, 5107.3318, 5101.8736, 5097.1839] +25-08-31 11:21:45 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:21:45 | D | - error = [ 5092.8289, 5087.1208, 5083.9113, 5079.0139, 5076.2892, 5072.4923, 5068.8658, 5064.3381, 5061.0232, 5061.2257] +25-08-31 11:21:45 | D | - best error = [ 5092.8289, 5087.1208, 5083.9113, 5079.0139, 5076.2892, 5072.4923, 5068.8658, 5064.3381, 5061.0232, 5061.0232] +25-08-31 11:21:45 | D | + Adding low-rank branches to single_transformer_blocks.12.proj_out.linears.1 +25-08-31 11:22:03 | D | - Calibrating low-rank branches of block single_transformer_blocks.13 +25-08-31 11:22:03 | D | - Calibrating low-rank branch for single_transformer_blocks.13.attn.to_q, single_transformer_blocks.13.attn.to_k, single_transformer_blocks.13.attn.to_v +25-08-31 11:22:03 | D | + w: sfp4_e2m1_all +25-08-31 11:22:03 | D | + x: sfp4_e2m1_all +25-08-31 11:22:03 | D | + y: None +25-08-31 11:22:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:22:03 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:22:03 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:22:04 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:22:17 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 11:22:17 | D | - error = [25009.2322, 24594.7585, 24291.1408, 24076.3319, 24013.0644, 24037.5253] +25-08-31 11:22:17 | D | - best error = [25009.2322, 24594.7585, 24291.1408, 24076.3319, 24013.0644, 24013.0644] +25-08-31 11:22:17 | D | + Adding low-rank branches to single_transformer_blocks.13.attn.to_q, single_transformer_blocks.13.attn.to_k, single_transformer_blocks.13.attn.to_v +25-08-31 11:22:17 | D | - Calibrating low-rank branch for single_transformer_blocks.13.proj_out.linears.0 +25-08-31 11:22:17 | D | + w: sfp4_e2m1_all +25-08-31 11:22:17 | D | + x: sfp4_e2m1_all +25-08-31 11:22:17 | D | + y: None +25-08-31 11:22:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:22:17 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:22:17 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:22:18 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:22:32 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 11:22:32 | D | - error = [ 3507.8883, 3473.6612, 3455.3255, 3438.7460, 3438.2147, 3426.6929, 3413.2153, 3404.9790, 3414.8001] +25-08-31 11:22:32 | D | - best error = [ 3507.8883, 3473.6612, 3455.3255, 3438.7460, 3438.2147, 3426.6929, 3413.2153, 3404.9790, 3404.9790] +25-08-31 11:22:32 | D | + Adding low-rank branches to single_transformer_blocks.13.proj_out.linears.0 +25-08-31 11:22:32 | D | - Calibrating low-rank branch for single_transformer_blocks.13.proj_mlp +25-08-31 11:22:32 | D | + w: sfp4_e2m1_all +25-08-31 11:22:32 | D | + x: sfp4_e2m1_all +25-08-31 11:22:32 | D | + y: None +25-08-31 11:22:32 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:22:32 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:22:32 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:22:34 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:22:54 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:22:54 | D | - error = [ 4716.4482, 4684.9694, 4664.4140, 4649.5015, 4637.9118, 4627.8418, 4620.7934, 4615.3747, 4608.9200, 4603.3388] +25-08-31 11:22:54 | D | - best error = [ 4716.4482, 4684.9694, 4664.4140, 4649.5015, 4637.9118, 4627.8418, 4620.7934, 4615.3747, 4608.9200, 4603.3388] +25-08-31 11:23:15 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:23:15 | D | - error = [ 4599.7683, 4596.4504, 4592.4660, 4590.0769, 4587.0010, 4583.6787, 4580.8215, 4579.4672, 4577.8052, 4576.0822] +25-08-31 11:23:15 | D | - best error = [ 4599.7683, 4596.4504, 4592.4660, 4590.0769, 4587.0010, 4583.6787, 4580.8215, 4579.4672, 4577.8052, 4576.0822] +25-08-31 11:23:37 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 11:23:37 | D | - error = [ 4575.0255, 4573.4254, 4571.0802, 4570.3617, 4568.5751, 4567.5206, 4566.8554, 4566.7065, 4565.5801, 4564.4430] +25-08-31 11:23:37 | D | - best error = [ 4575.0255, 4573.4254, 4571.0802, 4570.3617, 4568.5751, 4567.5206, 4566.8554, 4566.7065, 4565.5801, 4564.4430] +25-08-31 11:23:46 | D | - iter = [ 30, 31, 32, 33] +25-08-31 11:23:46 | D | - error = [ 4564.3762, 4563.0135, 4562.4618, 4562.5390] +25-08-31 11:23:46 | D | - best error = [ 4564.3762, 4563.0135, 4562.4618, 4562.4618] +25-08-31 11:23:46 | D | + Adding low-rank branches to single_transformer_blocks.13.proj_mlp +25-08-31 11:23:46 | D | - Calibrating low-rank branch for single_transformer_blocks.13.proj_out.linears.1 +25-08-31 11:23:46 | D | + w: sfp4_e2m1_all +25-08-31 11:23:46 | D | + x: sfp4_e2m1_all +25-08-31 11:23:46 | D | + y: None +25-08-31 11:23:46 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:23:46 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:23:46 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:23:51 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:24:24 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:24:24 | D | - error = [ 6657.2153, 6609.5113, 6579.3696, 6557.0313, 6535.6160, 6524.7578, 6511.0285, 6502.6767, 6491.6432, 6480.6626] +25-08-31 11:24:24 | D | - best error = [ 6657.2153, 6609.5113, 6579.3696, 6557.0313, 6535.6160, 6524.7578, 6511.0285, 6502.6767, 6491.6432, 6480.6626] +25-08-31 11:24:50 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17] +25-08-31 11:24:50 | D | - error = [ 6475.9413, 6474.9554, 6468.2511, 6461.7787, 6459.5037, 6456.3423, 6448.9016, 6450.2673] +25-08-31 11:24:50 | D | - best error = [ 6475.9413, 6474.9554, 6468.2511, 6461.7787, 6459.5037, 6456.3423, 6448.9016, 6448.9016] +25-08-31 11:24:50 | D | + Adding low-rank branches to single_transformer_blocks.13.proj_out.linears.1 +25-08-31 11:25:08 | D | - Calibrating low-rank branches of block single_transformer_blocks.14 +25-08-31 11:25:08 | D | - Calibrating low-rank branch for single_transformer_blocks.14.attn.to_q, single_transformer_blocks.14.attn.to_k, single_transformer_blocks.14.attn.to_v +25-08-31 11:25:08 | D | + w: sfp4_e2m1_all +25-08-31 11:25:08 | D | + x: sfp4_e2m1_all +25-08-31 11:25:08 | D | + y: None +25-08-31 11:25:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:25:08 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:25:08 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:25:09 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:25:27 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 11:25:27 | D | - error = [23917.4126, 23291.4364, 23085.8641, 22917.6103, 22751.3980, 22665.2396, 22548.3268, 22612.1114] +25-08-31 11:25:27 | D | - best error = [23917.4126, 23291.4364, 23085.8641, 22917.6103, 22751.3980, 22665.2396, 22548.3268, 22548.3268] +25-08-31 11:25:27 | D | + Adding low-rank branches to single_transformer_blocks.14.attn.to_q, single_transformer_blocks.14.attn.to_k, single_transformer_blocks.14.attn.to_v +25-08-31 11:25:27 | D | - Calibrating low-rank branch for single_transformer_blocks.14.proj_out.linears.0 +25-08-31 11:25:27 | D | + w: sfp4_e2m1_all +25-08-31 11:25:27 | D | + x: sfp4_e2m1_all +25-08-31 11:25:27 | D | + y: None +25-08-31 11:25:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:25:27 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:25:27 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:25:28 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:25:43 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:25:43 | D | - error = [ 4631.6859, 4594.4501, 4572.4903, 4548.4738, 4530.8432, 4517.5077, 4510.0847, 4500.2558, 4494.5910, 4490.2733] +25-08-31 11:25:43 | D | - best error = [ 4631.6859, 4594.4501, 4572.4903, 4548.4738, 4530.8432, 4517.5077, 4510.0847, 4500.2558, 4494.5910, 4490.2733] +25-08-31 11:25:58 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:25:58 | D | - error = [ 4487.0995, 4478.8168, 4472.8579, 4464.9699, 4457.7566, 4456.7683, 4450.7267, 4448.8911, 4446.3272, 4443.7511] +25-08-31 11:25:58 | D | - best error = [ 4487.0995, 4478.8168, 4472.8579, 4464.9699, 4457.7566, 4456.7683, 4450.7267, 4448.8911, 4446.3272, 4443.7511] +25-08-31 11:25:59 | D | - iter = [ 20] +25-08-31 11:25:59 | D | - error = [ 4460.3468] +25-08-31 11:25:59 | D | - best error = [ 4443.7511] +25-08-31 11:25:59 | D | + Adding low-rank branches to single_transformer_blocks.14.proj_out.linears.0 +25-08-31 11:26:00 | D | - Calibrating low-rank branch for single_transformer_blocks.14.proj_mlp +25-08-31 11:26:00 | D | + w: sfp4_e2m1_all +25-08-31 11:26:00 | D | + x: sfp4_e2m1_all +25-08-31 11:26:00 | D | + y: None +25-08-31 11:26:00 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:26:00 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:26:00 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:26:01 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:26:22 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:26:22 | D | - error = [ 4854.9292, 4821.5527, 4799.3258, 4782.5341, 4770.4168, 4769.9824, 4759.9223, 4750.7498, 4744.8300, 4740.0150] +25-08-31 11:26:22 | D | - best error = [ 4854.9292, 4821.5527, 4799.3258, 4782.5341, 4770.4168, 4769.9824, 4759.9223, 4750.7498, 4744.8300, 4740.0150] +25-08-31 11:26:44 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:26:44 | D | - error = [ 4734.4808, 4730.3556, 4726.9187, 4724.0394, 4721.6150, 4718.9266, 4716.5833, 4714.5939, 4709.1545, 4706.7619] +25-08-31 11:26:44 | D | - best error = [ 4734.4808, 4730.3556, 4726.9187, 4724.0394, 4721.6150, 4718.9266, 4716.5833, 4714.5939, 4709.1545, 4706.7619] +25-08-31 11:27:06 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 11:27:06 | D | - error = [ 4704.6507, 4701.8390, 4700.0121, 4698.7837, 4696.5651, 4696.3132, 4694.8105, 4693.5460, 4693.1942, 4692.1012] +25-08-31 11:27:06 | D | - best error = [ 4704.6507, 4701.8390, 4700.0121, 4698.7837, 4696.5651, 4696.3132, 4694.8105, 4693.5460, 4693.1942, 4692.1012] +25-08-31 11:27:21 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36] +25-08-31 11:27:21 | D | - error = [ 4691.1651, 4690.2840, 4689.9096, 4689.5501, 4689.0389, 4688.0747, 4688.6075] +25-08-31 11:27:21 | D | - best error = [ 4691.1651, 4690.2840, 4689.9096, 4689.5501, 4689.0389, 4688.0747, 4688.0747] +25-08-31 11:27:21 | D | + Adding low-rank branches to single_transformer_blocks.14.proj_mlp +25-08-31 11:27:21 | D | - Calibrating low-rank branch for single_transformer_blocks.14.proj_out.linears.1 +25-08-31 11:27:21 | D | + w: sfp4_e2m1_all +25-08-31 11:27:21 | D | + x: sfp4_e2m1_all +25-08-31 11:27:21 | D | + y: None +25-08-31 11:27:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:27:21 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:27:21 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:27:26 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:27:58 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:27:58 | D | - error = [ 5935.9602, 5895.7554, 5868.3405, 5848.7212, 5832.3158, 5821.7537, 5811.1507, 5795.9941, 5789.1153, 5782.5890] +25-08-31 11:27:58 | D | - best error = [ 5935.9602, 5895.7554, 5868.3405, 5848.7212, 5832.3158, 5821.7537, 5811.1507, 5795.9941, 5789.1153, 5782.5890] +25-08-31 11:28:30 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:28:30 | D | - error = [ 5775.9541, 5768.2885, 5765.1075, 5762.6102, 5760.8248, 5755.9715, 5754.5266, 5750.8959, 5746.7768, 5745.6881] +25-08-31 11:28:30 | D | - best error = [ 5775.9541, 5768.2885, 5765.1075, 5762.6102, 5760.8248, 5755.9715, 5754.5266, 5750.8959, 5746.7768, 5745.6881] +25-08-31 11:28:50 | D | - iter = [ 20, 21, 22, 23, 24, 25] +25-08-31 11:28:50 | D | - error = [ 5742.9506, 5742.2213, 5740.9835, 5737.0567, 5735.3003, 5735.8267] +25-08-31 11:28:50 | D | - best error = [ 5742.9506, 5742.2213, 5740.9835, 5737.0567, 5735.3003, 5735.3003] +25-08-31 11:28:50 | D | + Adding low-rank branches to single_transformer_blocks.14.proj_out.linears.1 +25-08-31 11:29:08 | D | - Calibrating low-rank branches of block single_transformer_blocks.15 +25-08-31 11:29:08 | D | - Calibrating low-rank branch for single_transformer_blocks.15.attn.to_q, single_transformer_blocks.15.attn.to_k, single_transformer_blocks.15.attn.to_v +25-08-31 11:29:08 | D | + w: sfp4_e2m1_all +25-08-31 11:29:08 | D | + x: sfp4_e2m1_all +25-08-31 11:29:08 | D | + y: None +25-08-31 11:29:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:29:08 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:29:08 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:29:09 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:29:31 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:29:31 | D | - error = [23491.1142, 23064.6160, 22830.9586, 22641.5929, 22442.2266, 22293.9827, 22263.7930, 22163.4257, 22101.0494, 22039.5543] +25-08-31 11:29:31 | D | - best error = [23491.1142, 23064.6160, 22830.9586, 22641.5929, 22442.2266, 22293.9827, 22263.7930, 22163.4257, 22101.0494, 22039.5543] +25-08-31 11:29:33 | D | - iter = [ 10] +25-08-31 11:29:33 | D | - error = [22132.6712] +25-08-31 11:29:33 | D | - best error = [22039.5543] +25-08-31 11:29:33 | D | + Adding low-rank branches to single_transformer_blocks.15.attn.to_q, single_transformer_blocks.15.attn.to_k, single_transformer_blocks.15.attn.to_v +25-08-31 11:29:34 | D | - Calibrating low-rank branch for single_transformer_blocks.15.proj_out.linears.0 +25-08-31 11:29:34 | D | + w: sfp4_e2m1_all +25-08-31 11:29:34 | D | + x: sfp4_e2m1_all +25-08-31 11:29:34 | D | + y: None +25-08-31 11:29:34 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:29:34 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:29:34 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:29:35 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:29:50 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:29:50 | D | - error = [ 2610.2701, 2582.0714, 2567.5701, 2557.7723, 2553.2248, 2541.3942, 2536.5435, 2530.2269, 2529.2518, 2524.2241] +25-08-31 11:29:50 | D | - best error = [ 2610.2701, 2582.0714, 2567.5701, 2557.7723, 2553.2248, 2541.3942, 2536.5435, 2530.2269, 2529.2518, 2524.2241] +25-08-31 11:30:04 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:30:04 | D | - error = [ 2520.5234, 2516.3647, 2514.4548, 2512.5434, 2512.1046, 2510.6566, 2508.9955, 2507.5379, 2505.3949, 2503.8328] +25-08-31 11:30:04 | D | - best error = [ 2520.5234, 2516.3647, 2514.4548, 2512.5434, 2512.1046, 2510.6566, 2508.9955, 2507.5379, 2505.3949, 2503.8328] +25-08-31 11:30:06 | D | - iter = [ 20] +25-08-31 11:30:06 | D | - error = [ 2504.0844] +25-08-31 11:30:06 | D | - best error = [ 2503.8328] +25-08-31 11:30:06 | D | + Adding low-rank branches to single_transformer_blocks.15.proj_out.linears.0 +25-08-31 11:30:06 | D | - Calibrating low-rank branch for single_transformer_blocks.15.proj_mlp +25-08-31 11:30:06 | D | + w: sfp4_e2m1_all +25-08-31 11:30:06 | D | + x: sfp4_e2m1_all +25-08-31 11:30:06 | D | + y: None +25-08-31 11:30:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:30:06 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:30:06 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:30:08 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:30:30 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:30:30 | D | - error = [ 5551.4596, 5506.1789, 5477.5288, 5456.5923, 5439.4625, 5426.4316, 5415.8888, 5408.0814, 5401.9485, 5410.0871] +25-08-31 11:30:30 | D | - best error = [ 5551.4596, 5506.1789, 5477.5288, 5456.5923, 5439.4625, 5426.4316, 5415.8888, 5408.0814, 5401.9485, 5401.9485] +25-08-31 11:30:30 | D | + Adding low-rank branches to single_transformer_blocks.15.proj_mlp +25-08-31 11:30:30 | D | - Calibrating low-rank branch for single_transformer_blocks.15.proj_out.linears.1 +25-08-31 11:30:30 | D | + w: sfp4_e2m1_all +25-08-31 11:30:30 | D | + x: sfp4_e2m1_all +25-08-31 11:30:30 | D | + y: None +25-08-31 11:30:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:30:30 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:30:30 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:30:35 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:31:07 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:31:07 | D | - error = [ 6337.8621, 6292.7139, 6261.0101, 6240.9220, 6224.0174, 6201.6643, 6186.3282, 6176.8442, 6167.8920, 6159.4048] +25-08-31 11:31:07 | D | - best error = [ 6337.8621, 6292.7139, 6261.0101, 6240.9220, 6224.0174, 6201.6643, 6186.3282, 6176.8442, 6167.8920, 6159.4048] +25-08-31 11:31:30 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16] +25-08-31 11:31:30 | D | - error = [ 6151.0952, 6145.7963, 6142.4572, 6139.0856, 6137.2023, 6131.7100, 6132.7762] +25-08-31 11:31:30 | D | - best error = [ 6151.0952, 6145.7963, 6142.4572, 6139.0856, 6137.2023, 6131.7100, 6131.7100] +25-08-31 11:31:30 | D | + Adding low-rank branches to single_transformer_blocks.15.proj_out.linears.1 +25-08-31 11:31:48 | D | - Calibrating low-rank branches of block single_transformer_blocks.16 +25-08-31 11:31:48 | D | - Calibrating low-rank branch for single_transformer_blocks.16.attn.to_q, single_transformer_blocks.16.attn.to_k, single_transformer_blocks.16.attn.to_v +25-08-31 11:31:48 | D | + w: sfp4_e2m1_all +25-08-31 11:31:48 | D | + x: sfp4_e2m1_all +25-08-31 11:31:48 | D | + y: None +25-08-31 11:31:48 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:31:48 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:31:48 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:31:49 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:32:05 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 11:32:05 | D | - error = [22908.4816, 22519.4813, 22260.9175, 22193.7389, 22107.6342, 22070.2014, 22077.6673] +25-08-31 11:32:05 | D | - best error = [22908.4816, 22519.4813, 22260.9175, 22193.7389, 22107.6342, 22070.2014, 22070.2014] +25-08-31 11:32:05 | D | + Adding low-rank branches to single_transformer_blocks.16.attn.to_q, single_transformer_blocks.16.attn.to_k, single_transformer_blocks.16.attn.to_v +25-08-31 11:32:05 | D | - Calibrating low-rank branch for single_transformer_blocks.16.proj_out.linears.0 +25-08-31 11:32:05 | D | + w: sfp4_e2m1_all +25-08-31 11:32:05 | D | + x: sfp4_e2m1_all +25-08-31 11:32:05 | D | + y: None +25-08-31 11:32:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:32:05 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:32:05 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:32:06 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:32:21 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:32:21 | D | - error = [ 3646.3419, 3618.3730, 3597.1728, 3576.6385, 3564.7230, 3549.9597, 3545.1056, 3543.8444, 3531.7683, 3533.1172] +25-08-31 11:32:21 | D | - best error = [ 3646.3419, 3618.3730, 3597.1728, 3576.6385, 3564.7230, 3549.9597, 3545.1056, 3543.8444, 3531.7683, 3531.7683] +25-08-31 11:32:21 | D | + Adding low-rank branches to single_transformer_blocks.16.proj_out.linears.0 +25-08-31 11:32:22 | D | - Calibrating low-rank branch for single_transformer_blocks.16.proj_mlp +25-08-31 11:32:22 | D | + w: sfp4_e2m1_all +25-08-31 11:32:22 | D | + x: sfp4_e2m1_all +25-08-31 11:32:22 | D | + y: None +25-08-31 11:32:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:32:22 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:32:22 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:32:23 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:32:44 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:32:44 | D | - error = [ 3565.8064, 3548.4877, 3533.9196, 3525.5202, 3518.9834, 3514.0408, 3509.8685, 3506.4318, 3503.2783, 3500.4200] +25-08-31 11:32:44 | D | - best error = [ 3565.8064, 3548.4877, 3533.9196, 3525.5202, 3518.9834, 3514.0408, 3509.8685, 3506.4318, 3503.2783, 3500.4200] +25-08-31 11:33:05 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:33:05 | D | - error = [ 3498.1228, 3496.0107, 3493.6142, 3491.9964, 3490.9492, 3489.4011, 3488.3885, 3487.0086, 3485.4389, 3484.1699] +25-08-31 11:33:05 | D | - best error = [ 3498.1228, 3496.0107, 3493.6142, 3491.9964, 3490.9492, 3489.4011, 3488.3885, 3487.0086, 3485.4389, 3484.1699] +25-08-31 11:33:27 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 11:33:27 | D | - error = [ 3483.4047, 3482.5884, 3482.0040, 3481.0483, 3480.1866, 3479.9583, 3479.0203, 3478.3375, 3477.4555, 3476.8454] +25-08-31 11:33:27 | D | - best error = [ 3483.4047, 3482.5884, 3482.0040, 3481.0483, 3480.1866, 3479.9583, 3479.0203, 3478.3375, 3477.4555, 3476.8454] +25-08-31 11:33:43 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36] +25-08-31 11:33:43 | D | - error = [ 3476.6231, 3476.1210, 3475.0121, 3474.2759, 3474.0240, 3473.6944, 3473.8346] +25-08-31 11:33:43 | D | - best error = [ 3476.6231, 3476.1210, 3475.0121, 3474.2759, 3474.0240, 3473.6944, 3473.6944] +25-08-31 11:33:43 | D | + Adding low-rank branches to single_transformer_blocks.16.proj_mlp +25-08-31 11:33:43 | D | - Calibrating low-rank branch for single_transformer_blocks.16.proj_out.linears.1 +25-08-31 11:33:43 | D | + w: sfp4_e2m1_all +25-08-31 11:33:43 | D | + x: sfp4_e2m1_all +25-08-31 11:33:43 | D | + y: None +25-08-31 11:33:43 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:33:43 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:33:43 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:33:48 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:34:20 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:34:20 | D | - error = [ 6280.4841, 6234.4325, 6209.2715, 6181.5966, 6155.9529, 6140.7263, 6127.0463, 6116.7030, 6112.9898, 6107.2580] +25-08-31 11:34:20 | D | - best error = [ 6280.4841, 6234.4325, 6209.2715, 6181.5966, 6155.9529, 6140.7263, 6127.0463, 6116.7030, 6112.9898, 6107.2580] +25-08-31 11:34:52 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:34:52 | D | - error = [ 6098.1148, 6091.0383, 6085.1380, 6079.2322, 6074.0392, 6071.2773, 6066.1451, 6064.3341, 6062.4774, 6057.5259] +25-08-31 11:34:52 | D | - best error = [ 6098.1148, 6091.0383, 6085.1380, 6079.2322, 6074.0392, 6071.2773, 6066.1451, 6064.3341, 6062.4774, 6057.5259] +25-08-31 11:35:02 | D | - iter = [ 20, 21, 22] +25-08-31 11:35:02 | D | - error = [ 6054.7436, 6052.6662, 6054.1553] +25-08-31 11:35:02 | D | - best error = [ 6054.7436, 6052.6662, 6052.6662] +25-08-31 11:35:02 | D | + Adding low-rank branches to single_transformer_blocks.16.proj_out.linears.1 +25-08-31 11:35:20 | D | - Calibrating low-rank branches of block single_transformer_blocks.17 +25-08-31 11:35:20 | D | - Calibrating low-rank branch for single_transformer_blocks.17.attn.to_q, single_transformer_blocks.17.attn.to_k, single_transformer_blocks.17.attn.to_v +25-08-31 11:35:20 | D | + w: sfp4_e2m1_all +25-08-31 11:35:20 | D | + x: sfp4_e2m1_all +25-08-31 11:35:20 | D | + y: None +25-08-31 11:35:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:35:20 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:35:20 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:35:21 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:35:39 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 11:35:39 | D | - error = [26843.4418, 26339.9535, 26084.0193, 25933.5064, 25821.9107, 25697.4520, 25618.5008, 25841.6916] +25-08-31 11:35:39 | D | - best error = [26843.4418, 26339.9535, 26084.0193, 25933.5064, 25821.9107, 25697.4520, 25618.5008, 25618.5008] +25-08-31 11:35:39 | D | + Adding low-rank branches to single_transformer_blocks.17.attn.to_q, single_transformer_blocks.17.attn.to_k, single_transformer_blocks.17.attn.to_v +25-08-31 11:35:39 | D | - Calibrating low-rank branch for single_transformer_blocks.17.proj_out.linears.0 +25-08-31 11:35:39 | D | + w: sfp4_e2m1_all +25-08-31 11:35:39 | D | + x: sfp4_e2m1_all +25-08-31 11:35:39 | D | + y: None +25-08-31 11:35:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:35:39 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:35:39 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:35:40 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:35:55 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:35:55 | D | - error = [ 2361.0527, 2337.9241, 2321.6411, 2312.5458, 2307.0803, 2299.8418, 2293.0793, 2290.0455, 2287.3510, 2284.8212] +25-08-31 11:35:55 | D | - best error = [ 2361.0527, 2337.9241, 2321.6411, 2312.5458, 2307.0803, 2299.8418, 2293.0793, 2290.0455, 2287.3510, 2284.8212] +25-08-31 11:36:10 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:36:10 | D | - error = [ 2281.2744, 2278.3214, 2276.5108, 2275.2395, 2273.1742, 2271.1918, 2269.5907, 2267.7164, 2265.5325, 2263.7636] +25-08-31 11:36:10 | D | - best error = [ 2281.2744, 2278.3214, 2276.5108, 2275.2395, 2273.1742, 2271.1918, 2269.5907, 2267.7164, 2265.5325, 2263.7636] +25-08-31 11:36:17 | D | - iter = [ 20, 21, 22, 23] +25-08-31 11:36:17 | D | - error = [ 2262.0696, 2261.4666, 2260.0567, 2260.1448] +25-08-31 11:36:17 | D | - best error = [ 2262.0696, 2261.4666, 2260.0567, 2260.0567] +25-08-31 11:36:17 | D | + Adding low-rank branches to single_transformer_blocks.17.proj_out.linears.0 +25-08-31 11:36:17 | D | - Calibrating low-rank branch for single_transformer_blocks.17.proj_mlp +25-08-31 11:36:17 | D | + w: sfp4_e2m1_all +25-08-31 11:36:17 | D | + x: sfp4_e2m1_all +25-08-31 11:36:17 | D | + y: None +25-08-31 11:36:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:36:17 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:36:17 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:36:19 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:36:40 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:36:40 | D | - error = [ 4881.9611, 4848.6936, 4826.9850, 4815.2439, 4800.8033, 4789.8029, 4782.8147, 4775.7977, 4770.1373, 4766.1704] +25-08-31 11:36:40 | D | - best error = [ 4881.9611, 4848.6936, 4826.9850, 4815.2439, 4800.8033, 4789.8029, 4782.8147, 4775.7977, 4770.1373, 4766.1704] +25-08-31 11:37:02 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:37:02 | D | - error = [ 4762.3822, 4758.9490, 4756.3707, 4753.5355, 4751.1607, 4748.3163, 4746.1108, 4744.0027, 4742.9366, 4741.5352] +25-08-31 11:37:02 | D | - best error = [ 4762.3822, 4758.9490, 4756.3707, 4753.5355, 4751.1607, 4748.3163, 4746.1108, 4744.0027, 4742.9366, 4741.5352] +25-08-31 11:37:23 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 11:37:23 | D | - error = [ 4739.4384, 4738.3132, 4736.5141, 4735.6596, 4734.8020, 4733.4592, 4731.9934, 4730.8211, 4730.7137, 4729.3680] +25-08-31 11:37:23 | D | - best error = [ 4739.4384, 4738.3132, 4736.5141, 4735.6596, 4734.8020, 4733.4592, 4731.9934, 4730.8211, 4730.7137, 4729.3680] +25-08-31 11:37:34 | D | - iter = [ 30, 31, 32, 33, 34] +25-08-31 11:37:34 | D | - error = [ 4728.0503, 4727.6946, 4727.2973, 4726.2507, 4741.0126] +25-08-31 11:37:34 | D | - best error = [ 4728.0503, 4727.6946, 4727.2973, 4726.2507, 4726.2507] +25-08-31 11:37:35 | D | + Adding low-rank branches to single_transformer_blocks.17.proj_mlp +25-08-31 11:37:35 | D | - Calibrating low-rank branch for single_transformer_blocks.17.proj_out.linears.1 +25-08-31 11:37:35 | D | + w: sfp4_e2m1_all +25-08-31 11:37:35 | D | + x: sfp4_e2m1_all +25-08-31 11:37:35 | D | + y: None +25-08-31 11:37:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:37:35 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:37:35 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:37:39 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:38:11 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:38:11 | D | - error = [ 5945.9183, 5899.2935, 5868.7362, 5840.8695, 5827.2762, 5813.2652, 5800.8727, 5788.0603, 5778.0843, 5769.6728] +25-08-31 11:38:11 | D | - best error = [ 5945.9183, 5899.2935, 5868.7362, 5840.8695, 5827.2762, 5813.2652, 5800.8727, 5788.0603, 5778.0843, 5769.6728] +25-08-31 11:38:44 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:38:44 | D | - error = [ 5762.0568, 5757.1399, 5751.6952, 5748.0884, 5743.2473, 5738.7457, 5735.2104, 5730.5089, 5729.5476, 5727.0884] +25-08-31 11:38:44 | D | - best error = [ 5762.0568, 5757.1399, 5751.6952, 5748.0884, 5743.2473, 5738.7457, 5735.2104, 5730.5089, 5729.5476, 5727.0884] +25-08-31 11:38:47 | D | - iter = [ 20] +25-08-31 11:38:47 | D | - error = [ 5727.8703] +25-08-31 11:38:47 | D | - best error = [ 5727.0884] +25-08-31 11:38:47 | D | + Adding low-rank branches to single_transformer_blocks.17.proj_out.linears.1 +25-08-31 11:39:05 | D | - Calibrating low-rank branches of block single_transformer_blocks.18 +25-08-31 11:39:05 | D | - Calibrating low-rank branch for single_transformer_blocks.18.attn.to_q, single_transformer_blocks.18.attn.to_k, single_transformer_blocks.18.attn.to_v +25-08-31 11:39:05 | D | + w: sfp4_e2m1_all +25-08-31 11:39:05 | D | + x: sfp4_e2m1_all +25-08-31 11:39:05 | D | + y: None +25-08-31 11:39:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:39:05 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:39:05 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:39:06 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:39:22 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 11:39:22 | D | - error = [23026.3807, 22685.4314, 22402.2113, 22276.3480, 22194.4981, 22125.8683, 22136.0376] +25-08-31 11:39:22 | D | - best error = [23026.3807, 22685.4314, 22402.2113, 22276.3480, 22194.4981, 22125.8683, 22125.8683] +25-08-31 11:39:22 | D | + Adding low-rank branches to single_transformer_blocks.18.attn.to_q, single_transformer_blocks.18.attn.to_k, single_transformer_blocks.18.attn.to_v +25-08-31 11:39:22 | D | - Calibrating low-rank branch for single_transformer_blocks.18.proj_out.linears.0 +25-08-31 11:39:22 | D | + w: sfp4_e2m1_all +25-08-31 11:39:22 | D | + x: sfp4_e2m1_all +25-08-31 11:39:22 | D | + y: None +25-08-31 11:39:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:39:22 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:39:22 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:39:23 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:39:38 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:39:38 | D | - error = [ 2653.6105, 2623.6529, 2607.9485, 2590.5664, 2578.0308, 2571.9015, 2565.4493, 2559.5050, 2554.7966, 2550.4026] +25-08-31 11:39:38 | D | - best error = [ 2653.6105, 2623.6529, 2607.9485, 2590.5664, 2578.0308, 2571.9015, 2565.4493, 2559.5050, 2554.7966, 2550.4026] +25-08-31 11:39:44 | D | - iter = [ 10, 11, 12, 13] +25-08-31 11:39:44 | D | - error = [ 2549.5442, 2544.9576, 2538.7146, 2539.3959] +25-08-31 11:39:44 | D | - best error = [ 2549.5442, 2544.9576, 2538.7146, 2538.7146] +25-08-31 11:39:44 | D | + Adding low-rank branches to single_transformer_blocks.18.proj_out.linears.0 +25-08-31 11:39:45 | D | - Calibrating low-rank branch for single_transformer_blocks.18.proj_mlp +25-08-31 11:39:45 | D | + w: sfp4_e2m1_all +25-08-31 11:39:45 | D | + x: sfp4_e2m1_all +25-08-31 11:39:45 | D | + y: None +25-08-31 11:39:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:39:45 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:39:45 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:39:46 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:40:08 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:40:08 | D | - error = [ 4617.2622, 4590.9759, 4573.7735, 4561.0104, 4551.8350, 4543.6056, 4537.5839, 4532.3910, 4527.7811, 4524.0729] +25-08-31 11:40:08 | D | - best error = [ 4617.2622, 4590.9759, 4573.7735, 4561.0104, 4551.8350, 4543.6056, 4537.5839, 4532.3910, 4527.7811, 4524.0729] +25-08-31 11:40:29 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:40:29 | D | - error = [ 4520.1343, 4517.1367, 4514.7983, 4512.1334, 4509.8714, 4508.4975, 4505.9727, 4504.4538, 4502.0040, 4501.2638] +25-08-31 11:40:29 | D | - best error = [ 4520.1343, 4517.1367, 4514.7983, 4512.1334, 4509.8714, 4508.4975, 4505.9727, 4504.4538, 4502.0040, 4501.2638] +25-08-31 11:40:51 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 11:40:51 | D | - error = [ 4499.2228, 4497.6736, 4496.8573, 4495.9998, 4495.0838, 4493.8077, 4492.3718, 4491.7498, 4490.8788, 4490.2982] +25-08-31 11:40:51 | D | - best error = [ 4499.2228, 4497.6736, 4496.8573, 4495.9998, 4495.0838, 4493.8077, 4492.3718, 4491.7498, 4490.8788, 4490.2982] +25-08-31 11:41:07 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36] +25-08-31 11:41:07 | D | - error = [ 4489.7189, 4488.5800, 4487.6609, 4486.7757, 4485.9307, 4485.0365, 4485.1740] +25-08-31 11:41:07 | D | - best error = [ 4489.7189, 4488.5800, 4487.6609, 4486.7757, 4485.9307, 4485.0365, 4485.0365] +25-08-31 11:41:07 | D | + Adding low-rank branches to single_transformer_blocks.18.proj_mlp +25-08-31 11:41:07 | D | - Calibrating low-rank branch for single_transformer_blocks.18.proj_out.linears.1 +25-08-31 11:41:07 | D | + w: sfp4_e2m1_all +25-08-31 11:41:07 | D | + x: sfp4_e2m1_all +25-08-31 11:41:07 | D | + y: None +25-08-31 11:41:07 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:41:07 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:41:07 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:41:12 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:41:44 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:41:44 | D | - error = [ 5966.7884, 5923.1576, 5889.0403, 5866.0644, 5847.6070, 5833.9033, 5820.4120, 5811.7701, 5801.5622, 5792.6944] +25-08-31 11:41:44 | D | - best error = [ 5966.7884, 5923.1576, 5889.0403, 5866.0644, 5847.6070, 5833.9033, 5820.4120, 5811.7701, 5801.5622, 5792.6944] +25-08-31 11:42:16 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:42:16 | D | - error = [ 5785.4784, 5778.4830, 5771.7013, 5769.4381, 5766.0732, 5763.5708, 5759.0977, 5752.8362, 5750.3033, 5748.3135] +25-08-31 11:42:16 | D | - best error = [ 5785.4784, 5778.4830, 5771.7013, 5769.4381, 5766.0732, 5763.5708, 5759.0977, 5752.8362, 5750.3033, 5748.3135] +25-08-31 11:42:32 | D | - iter = [ 20, 21, 22, 23, 24] +25-08-31 11:42:32 | D | - error = [ 5745.1743, 5741.9944, 5741.0978, 5738.3559, 5739.6361] +25-08-31 11:42:32 | D | - best error = [ 5745.1743, 5741.9944, 5741.0978, 5738.3559, 5738.3559] +25-08-31 11:42:32 | D | + Adding low-rank branches to single_transformer_blocks.18.proj_out.linears.1 +25-08-31 11:42:50 | D | - Calibrating low-rank branches of block single_transformer_blocks.19 +25-08-31 11:42:50 | D | - Calibrating low-rank branch for single_transformer_blocks.19.attn.to_q, single_transformer_blocks.19.attn.to_k, single_transformer_blocks.19.attn.to_v +25-08-31 11:42:50 | D | + w: sfp4_e2m1_all +25-08-31 11:42:50 | D | + x: sfp4_e2m1_all +25-08-31 11:42:50 | D | + y: None +25-08-31 11:42:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:42:50 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:42:50 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:42:52 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:43:00 | D | - iter = [ 0, 1, 2, 3] +25-08-31 11:43:00 | D | - error = [22501.9061, 22032.0370, 21763.7526, 21832.8787] +25-08-31 11:43:00 | D | - best error = [22501.9061, 22032.0370, 21763.7526, 21763.7526] +25-08-31 11:43:01 | D | + Adding low-rank branches to single_transformer_blocks.19.attn.to_q, single_transformer_blocks.19.attn.to_k, single_transformer_blocks.19.attn.to_v +25-08-31 11:43:01 | D | - Calibrating low-rank branch for single_transformer_blocks.19.proj_out.linears.0 +25-08-31 11:43:01 | D | + w: sfp4_e2m1_all +25-08-31 11:43:01 | D | + x: sfp4_e2m1_all +25-08-31 11:43:01 | D | + y: None +25-08-31 11:43:01 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:43:01 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:43:01 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:43:02 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:43:17 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:43:17 | D | - error = [ 1847.2243, 1832.9750, 1823.8026, 1817.5622, 1809.2584, 1803.5030, 1799.3264, 1795.9523, 1792.6277, 1793.1087] +25-08-31 11:43:17 | D | - best error = [ 1847.2243, 1832.9750, 1823.8026, 1817.5622, 1809.2584, 1803.5030, 1799.3264, 1795.9523, 1792.6277, 1792.6277] +25-08-31 11:43:17 | D | + Adding low-rank branches to single_transformer_blocks.19.proj_out.linears.0 +25-08-31 11:43:17 | D | - Calibrating low-rank branch for single_transformer_blocks.19.proj_mlp +25-08-31 11:43:17 | D | + w: sfp4_e2m1_all +25-08-31 11:43:17 | D | + x: sfp4_e2m1_all +25-08-31 11:43:17 | D | + y: None +25-08-31 11:43:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:43:17 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:43:17 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:43:18 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:43:35 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 11:43:35 | D | - error = [ 4363.5062, 4341.0483, 4325.8548, 4313.4617, 4304.6145, 4297.3929, 4291.1956, 4300.2493] +25-08-31 11:43:35 | D | - best error = [ 4363.5062, 4341.0483, 4325.8548, 4313.4617, 4304.6145, 4297.3929, 4291.1956, 4291.1956] +25-08-31 11:43:36 | D | + Adding low-rank branches to single_transformer_blocks.19.proj_mlp +25-08-31 11:43:36 | D | - Calibrating low-rank branch for single_transformer_blocks.19.proj_out.linears.1 +25-08-31 11:43:36 | D | + w: sfp4_e2m1_all +25-08-31 11:43:36 | D | + x: sfp4_e2m1_all +25-08-31 11:43:36 | D | + y: None +25-08-31 11:43:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:43:36 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:43:36 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:43:41 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:44:12 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:44:12 | D | - error = [ 5630.1098, 5582.6300, 5553.0529, 5531.6018, 5516.6571, 5501.4498, 5491.4410, 5480.4998, 5473.1706, 5466.1048] +25-08-31 11:44:12 | D | - best error = [ 5630.1098, 5582.6300, 5553.0529, 5531.6018, 5516.6571, 5501.4498, 5491.4410, 5480.4998, 5473.1706, 5466.1048] +25-08-31 11:44:42 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18] +25-08-31 11:44:42 | D | - error = [ 5459.6648, 5453.3375, 5449.0045, 5446.1866, 5442.7521, 5438.7182, 5433.8456, 5427.4425, 5428.4617] +25-08-31 11:44:42 | D | - best error = [ 5459.6648, 5453.3375, 5449.0045, 5446.1866, 5442.7521, 5438.7182, 5433.8456, 5427.4425, 5427.4425] +25-08-31 11:44:42 | D | + Adding low-rank branches to single_transformer_blocks.19.proj_out.linears.1 +25-08-31 11:45:00 | D | - Calibrating low-rank branches of block single_transformer_blocks.20 +25-08-31 11:45:00 | D | - Calibrating low-rank branch for single_transformer_blocks.20.attn.to_q, single_transformer_blocks.20.attn.to_k, single_transformer_blocks.20.attn.to_v +25-08-31 11:45:00 | D | + w: sfp4_e2m1_all +25-08-31 11:45:00 | D | + x: sfp4_e2m1_all +25-08-31 11:45:00 | D | + y: None +25-08-31 11:45:00 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:45:00 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:45:00 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:45:01 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:45:21 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 11:45:21 | D | - error = [23601.0454, 23156.1793, 23002.1105, 22976.3468, 22855.7711, 22813.7313, 22793.1302, 22748.3781, 22849.7681] +25-08-31 11:45:21 | D | - best error = [23601.0454, 23156.1793, 23002.1105, 22976.3468, 22855.7711, 22813.7313, 22793.1302, 22748.3781, 22748.3781] +25-08-31 11:45:21 | D | + Adding low-rank branches to single_transformer_blocks.20.attn.to_q, single_transformer_blocks.20.attn.to_k, single_transformer_blocks.20.attn.to_v +25-08-31 11:45:21 | D | - Calibrating low-rank branch for single_transformer_blocks.20.proj_out.linears.0 +25-08-31 11:45:21 | D | + w: sfp4_e2m1_all +25-08-31 11:45:21 | D | + x: sfp4_e2m1_all +25-08-31 11:45:21 | D | + y: None +25-08-31 11:45:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:45:21 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:45:21 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:45:23 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:45:37 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:45:37 | D | - error = [ 2842.8756, 2811.8831, 2796.4150, 2777.5065, 2773.4938, 2758.6754, 2751.2641, 2747.5222, 2744.7615, 2742.7535] +25-08-31 11:45:37 | D | - best error = [ 2842.8756, 2811.8831, 2796.4150, 2777.5065, 2773.4938, 2758.6754, 2751.2641, 2747.5222, 2744.7615, 2742.7535] +25-08-31 11:45:46 | D | - iter = [ 10, 11, 12, 13, 14, 15] +25-08-31 11:45:46 | D | - error = [ 2736.8328, 2733.9818, 2732.0362, 2727.2875, 2726.6298, 2727.1807] +25-08-31 11:45:46 | D | - best error = [ 2736.8328, 2733.9818, 2732.0362, 2727.2875, 2726.6298, 2726.6298] +25-08-31 11:45:46 | D | + Adding low-rank branches to single_transformer_blocks.20.proj_out.linears.0 +25-08-31 11:45:46 | D | - Calibrating low-rank branch for single_transformer_blocks.20.proj_mlp +25-08-31 11:45:46 | D | + w: sfp4_e2m1_all +25-08-31 11:45:46 | D | + x: sfp4_e2m1_all +25-08-31 11:45:46 | D | + y: None +25-08-31 11:45:46 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:45:46 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:45:46 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:45:48 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:46:10 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:46:10 | D | - error = [ 5015.1726, 4980.3924, 4964.4327, 4943.9072, 4930.2142, 4920.0159, 4911.9173, 4905.1502, 4899.6735, 4893.6310] +25-08-31 11:46:10 | D | - best error = [ 5015.1726, 4980.3924, 4964.4327, 4943.9072, 4930.2142, 4920.0159, 4911.9173, 4905.1502, 4899.6735, 4893.6310] +25-08-31 11:46:31 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:46:31 | D | - error = [ 4889.0253, 4885.4822, 4881.9541, 4878.7650, 4875.9785, 4873.5203, 4871.6201, 4869.6951, 4868.0292, 4866.0413] +25-08-31 11:46:31 | D | - best error = [ 4889.0253, 4885.4822, 4881.9541, 4878.7650, 4875.9785, 4873.5203, 4871.6201, 4869.6951, 4868.0292, 4866.0413] +25-08-31 11:46:53 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 11:46:53 | D | - error = [ 4864.5261, 4862.4751, 4860.5594, 4858.9950, 4858.5331, 4856.9295, 4855.3515, 4854.0087, 4853.6934, 4852.4897] +25-08-31 11:46:53 | D | - best error = [ 4864.5261, 4862.4751, 4860.5594, 4858.9950, 4858.5331, 4856.9295, 4855.3515, 4854.0087, 4853.6934, 4852.4897] +25-08-31 11:47:14 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-31 11:47:14 | D | - error = [ 4851.2971, 4850.8653, 4850.0103, 4849.8344, 4849.2804, 4848.7598, 4847.3902, 4846.2418, 4845.9979, 4845.4550] +25-08-31 11:47:14 | D | - best error = [ 4851.2971, 4850.8653, 4850.0103, 4849.8344, 4849.2804, 4848.7598, 4847.3902, 4846.2418, 4845.9979, 4845.4550] +25-08-31 11:47:16 | D | - iter = [ 40] +25-08-31 11:47:16 | D | - error = [ 4845.5373] +25-08-31 11:47:16 | D | - best error = [ 4845.4550] +25-08-31 11:47:17 | D | + Adding low-rank branches to single_transformer_blocks.20.proj_mlp +25-08-31 11:47:17 | D | - Calibrating low-rank branch for single_transformer_blocks.20.proj_out.linears.1 +25-08-31 11:47:17 | D | + w: sfp4_e2m1_all +25-08-31 11:47:17 | D | + x: sfp4_e2m1_all +25-08-31 11:47:17 | D | + y: None +25-08-31 11:47:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:47:17 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:47:17 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:47:22 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:47:53 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:47:53 | D | - error = [ 5742.8164, 5707.0725, 5674.9153, 5654.2119, 5636.7269, 5623.8339, 5610.6679, 5601.4921, 5593.7440, 5590.6186] +25-08-31 11:47:53 | D | - best error = [ 5742.8164, 5707.0725, 5674.9153, 5654.2119, 5636.7269, 5623.8339, 5610.6679, 5601.4921, 5593.7440, 5590.6186] +25-08-31 11:48:26 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:48:26 | D | - error = [ 5586.0772, 5576.6424, 5575.7150, 5572.1181, 5568.0638, 5561.9775, 5558.9031, 5558.3739, 5554.2926, 5552.2415] +25-08-31 11:48:26 | D | - best error = [ 5586.0772, 5576.6424, 5575.7150, 5572.1181, 5568.0638, 5561.9775, 5558.9031, 5558.3739, 5554.2926, 5552.2415] +25-08-31 11:48:48 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26] +25-08-31 11:48:48 | D | - error = [ 5551.9906, 5548.5209, 5547.9200, 5546.5097, 5543.6291, 5540.7568, 5541.5835] +25-08-31 11:48:48 | D | - best error = [ 5551.9906, 5548.5209, 5547.9200, 5546.5097, 5543.6291, 5540.7568, 5540.7568] +25-08-31 11:48:49 | D | + Adding low-rank branches to single_transformer_blocks.20.proj_out.linears.1 +25-08-31 11:49:06 | D | - Calibrating low-rank branches of block single_transformer_blocks.21 +25-08-31 11:49:06 | D | - Calibrating low-rank branch for single_transformer_blocks.21.attn.to_q, single_transformer_blocks.21.attn.to_k, single_transformer_blocks.21.attn.to_v +25-08-31 11:49:06 | D | + w: sfp4_e2m1_all +25-08-31 11:49:06 | D | + x: sfp4_e2m1_all +25-08-31 11:49:06 | D | + y: None +25-08-31 11:49:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:49:06 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:49:06 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:49:08 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:49:28 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 11:49:28 | D | - error = [23268.3834, 22987.8472, 22793.2530, 22588.9523, 22402.5856, 22278.5177, 22224.8927, 22078.8348, 22103.3449] +25-08-31 11:49:28 | D | - best error = [23268.3834, 22987.8472, 22793.2530, 22588.9523, 22402.5856, 22278.5177, 22224.8927, 22078.8348, 22078.8348] +25-08-31 11:49:28 | D | + Adding low-rank branches to single_transformer_blocks.21.attn.to_q, single_transformer_blocks.21.attn.to_k, single_transformer_blocks.21.attn.to_v +25-08-31 11:49:28 | D | - Calibrating low-rank branch for single_transformer_blocks.21.proj_out.linears.0 +25-08-31 11:49:28 | D | + w: sfp4_e2m1_all +25-08-31 11:49:28 | D | + x: sfp4_e2m1_all +25-08-31 11:49:28 | D | + y: None +25-08-31 11:49:29 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:49:29 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:49:29 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:49:30 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:49:45 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:49:45 | D | - error = [ 2025.4930, 2012.3658, 2009.7856, 1997.6129, 1991.6991, 1979.7206, 1977.5069, 1973.3193, 1968.9241, 1965.2125] +25-08-31 11:49:45 | D | - best error = [ 2025.4930, 2012.3658, 2009.7856, 1997.6129, 1991.6991, 1979.7206, 1977.5069, 1973.3193, 1968.9241, 1965.2125] +25-08-31 11:49:58 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17] +25-08-31 11:49:58 | D | - error = [ 1962.9325, 1960.7532, 1958.3083, 1956.2494, 1955.5166, 1953.0669, 1952.1332, 1954.0143] +25-08-31 11:49:58 | D | - best error = [ 1962.9325, 1960.7532, 1958.3083, 1956.2494, 1955.5166, 1953.0669, 1952.1332, 1952.1332] +25-08-31 11:49:58 | D | + Adding low-rank branches to single_transformer_blocks.21.proj_out.linears.0 +25-08-31 11:49:58 | D | - Calibrating low-rank branch for single_transformer_blocks.21.proj_mlp +25-08-31 11:49:58 | D | + w: sfp4_e2m1_all +25-08-31 11:49:58 | D | + x: sfp4_e2m1_all +25-08-31 11:49:58 | D | + y: None +25-08-31 11:49:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:49:58 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:49:58 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:50:00 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:50:13 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 11:50:13 | D | - error = [ 4548.2471, 4523.3050, 4506.3282, 4494.0472, 4482.9974, 4486.0134] +25-08-31 11:50:13 | D | - best error = [ 4548.2471, 4523.3050, 4506.3282, 4494.0472, 4482.9974, 4482.9974] +25-08-31 11:50:13 | D | + Adding low-rank branches to single_transformer_blocks.21.proj_mlp +25-08-31 11:50:13 | D | - Calibrating low-rank branch for single_transformer_blocks.21.proj_out.linears.1 +25-08-31 11:50:13 | D | + w: sfp4_e2m1_all +25-08-31 11:50:13 | D | + x: sfp4_e2m1_all +25-08-31 11:50:13 | D | + y: None +25-08-31 11:50:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:50:13 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:50:13 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:50:18 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:50:49 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:50:49 | D | - error = [ 5451.8529, 5410.0699, 5379.1332, 5354.5159, 5340.6389, 5325.7987, 5317.6777, 5308.7373, 5297.2023, 5289.6489] +25-08-31 11:50:49 | D | - best error = [ 5451.8529, 5410.0699, 5379.1332, 5354.5159, 5340.6389, 5325.7987, 5317.6777, 5308.7373, 5297.2023, 5289.6489] +25-08-31 11:51:22 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:51:22 | D | - error = [ 5282.6580, 5277.3124, 5272.9832, 5268.5860, 5263.9968, 5262.1990, 5259.1222, 5254.4052, 5252.2310, 5249.3419] +25-08-31 11:51:22 | D | - best error = [ 5282.6580, 5277.3124, 5272.9832, 5268.5860, 5263.9968, 5262.1990, 5259.1222, 5254.4052, 5252.2310, 5249.3419] +25-08-31 11:51:50 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28] +25-08-31 11:51:50 | D | - error = [ 5246.8829, 5246.4417, 5244.6988, 5241.2563, 5239.0692, 5238.9978, 5238.8619, 5236.8330, 5236.9697] +25-08-31 11:51:50 | D | - best error = [ 5246.8829, 5246.4417, 5244.6988, 5241.2563, 5239.0692, 5238.9978, 5238.8619, 5236.8330, 5236.8330] +25-08-31 11:51:51 | D | + Adding low-rank branches to single_transformer_blocks.21.proj_out.linears.1 +25-08-31 11:52:08 | D | - Calibrating low-rank branches of block single_transformer_blocks.22 +25-08-31 11:52:08 | D | - Calibrating low-rank branch for single_transformer_blocks.22.attn.to_q, single_transformer_blocks.22.attn.to_k, single_transformer_blocks.22.attn.to_v +25-08-31 11:52:08 | D | + w: sfp4_e2m1_all +25-08-31 11:52:08 | D | + x: sfp4_e2m1_all +25-08-31 11:52:08 | D | + y: None +25-08-31 11:52:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:52:08 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:52:08 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:52:10 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:52:23 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 11:52:23 | D | - error = [28453.9206, 28122.6696, 27869.5361, 27699.5584, 27579.0111, 27596.5575] +25-08-31 11:52:23 | D | - best error = [28453.9206, 28122.6696, 27869.5361, 27699.5584, 27579.0111, 27579.0111] +25-08-31 11:52:23 | D | + Adding low-rank branches to single_transformer_blocks.22.attn.to_q, single_transformer_blocks.22.attn.to_k, single_transformer_blocks.22.attn.to_v +25-08-31 11:52:23 | D | - Calibrating low-rank branch for single_transformer_blocks.22.proj_out.linears.0 +25-08-31 11:52:23 | D | + w: sfp4_e2m1_all +25-08-31 11:52:23 | D | + x: sfp4_e2m1_all +25-08-31 11:52:23 | D | + y: None +25-08-31 11:52:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:52:23 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:52:23 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:52:24 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:52:39 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:52:39 | D | - error = [ 1594.0192, 1578.1600, 1567.7859, 1561.7310, 1557.1371, 1551.8135, 1546.2324, 1543.6626, 1540.5864, 1539.5345] +25-08-31 11:52:39 | D | - best error = [ 1594.0192, 1578.1600, 1567.7859, 1561.7310, 1557.1371, 1551.8135, 1546.2324, 1543.6626, 1540.5864, 1539.5345] +25-08-31 11:52:45 | D | - iter = [ 10, 11, 12, 13] +25-08-31 11:52:45 | D | - error = [ 1538.0545, 1536.7223, 1535.0792, 1540.4057] +25-08-31 11:52:45 | D | - best error = [ 1538.0545, 1536.7223, 1535.0792, 1535.0792] +25-08-31 11:52:45 | D | + Adding low-rank branches to single_transformer_blocks.22.proj_out.linears.0 +25-08-31 11:52:45 | D | - Calibrating low-rank branch for single_transformer_blocks.22.proj_mlp +25-08-31 11:52:45 | D | + w: sfp4_e2m1_all +25-08-31 11:52:45 | D | + x: sfp4_e2m1_all +25-08-31 11:52:45 | D | + y: None +25-08-31 11:52:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:52:45 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:52:45 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:52:47 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:53:08 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:53:08 | D | - error = [ 4513.9681, 4485.8863, 4467.2584, 4453.3277, 4442.5785, 4434.4080, 4427.4191, 4421.1021, 4415.9780, 4412.2883] +25-08-31 11:53:08 | D | - best error = [ 4513.9681, 4485.8863, 4467.2584, 4453.3277, 4442.5785, 4434.4080, 4427.4191, 4421.1021, 4415.9780, 4412.2883] +25-08-31 11:53:30 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:53:30 | D | - error = [ 4408.0225, 4404.7275, 4402.1290, 4399.4911, 4397.6082, 4395.7846, 4393.5881, 4392.1316, 4390.0610, 4387.8530] +25-08-31 11:53:30 | D | - best error = [ 4408.0225, 4404.7275, 4402.1290, 4399.4911, 4397.6082, 4395.7846, 4393.5881, 4392.1316, 4390.0610, 4387.8530] +25-08-31 11:53:51 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 11:53:51 | D | - error = [ 4385.8846, 4384.4820, 4382.3377, 4381.5381, 4380.3087, 4379.1483, 4377.7602, 4376.8585, 4375.9596, 4375.2902] +25-08-31 11:53:51 | D | - best error = [ 4385.8846, 4384.4820, 4382.3377, 4381.5381, 4380.3087, 4379.1483, 4377.7602, 4376.8585, 4375.9596, 4375.2902] +25-08-31 11:54:13 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-31 11:54:13 | D | - error = [ 4374.3557, 4374.0254, 4373.4275, 4372.8359, 4371.9456, 4370.8752, 4369.8794, 4368.9857, 4368.0746, 4367.7097] +25-08-31 11:54:13 | D | - best error = [ 4374.3557, 4374.0254, 4373.4275, 4372.8359, 4371.9456, 4370.8752, 4369.8794, 4368.9857, 4368.0746, 4367.7097] +25-08-31 11:54:34 | D | - iter = [ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49] +25-08-31 11:54:34 | D | - error = [ 4367.5751, 4367.2985, 4366.9495, 4366.6110, 4366.3294, 4365.3939, 4365.3247, 4364.9131, 4364.6443, 4364.5859] +25-08-31 11:54:34 | D | - best error = [ 4367.5751, 4367.2985, 4366.9495, 4366.6110, 4366.3294, 4365.3939, 4365.3247, 4364.9131, 4364.6443, 4364.5859] +25-08-31 11:54:56 | D | - iter = [ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59] +25-08-31 11:54:56 | D | - error = [ 4364.3270, 4364.0619, 4363.8928, 4363.2609, 4362.7109, 4362.2751, 4362.0828, 4361.7221, 4361.3995, 4361.4089] +25-08-31 11:54:56 | D | - best error = [ 4364.3270, 4364.0619, 4363.8928, 4363.2609, 4362.7109, 4362.2751, 4362.0828, 4361.7221, 4361.3995, 4361.3995] +25-08-31 11:54:56 | D | + Adding low-rank branches to single_transformer_blocks.22.proj_mlp +25-08-31 11:54:56 | D | - Calibrating low-rank branch for single_transformer_blocks.22.proj_out.linears.1 +25-08-31 11:54:56 | D | + w: sfp4_e2m1_all +25-08-31 11:54:56 | D | + x: sfp4_e2m1_all +25-08-31 11:54:56 | D | + y: None +25-08-31 11:54:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:54:56 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:54:56 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:55:01 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:55:33 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:55:33 | D | - error = [ 5450.5904, 5408.2080, 5379.3068, 5355.0289, 5337.3424, 5318.6444, 5310.2354, 5294.1020, 5283.9447, 5279.3926] +25-08-31 11:55:33 | D | - best error = [ 5450.5904, 5408.2080, 5379.3068, 5355.0289, 5337.3424, 5318.6444, 5310.2354, 5294.1020, 5283.9447, 5279.3926] +25-08-31 11:56:04 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:56:04 | D | - error = [ 5272.0307, 5267.5343, 5263.2657, 5260.3827, 5255.6952, 5251.7070, 5249.7015, 5248.0934, 5243.9041, 5240.7364] +25-08-31 11:56:04 | D | - best error = [ 5272.0307, 5267.5343, 5263.2657, 5260.3827, 5255.6952, 5251.7070, 5249.7015, 5248.0934, 5243.9041, 5240.7364] +25-08-31 11:56:29 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27] +25-08-31 11:56:29 | D | - error = [ 5240.0031, 5237.3377, 5234.4818, 5232.5650, 5231.5220, 5226.3976, 5225.1771, 5228.5113] +25-08-31 11:56:29 | D | - best error = [ 5240.0031, 5237.3377, 5234.4818, 5232.5650, 5231.5220, 5226.3976, 5225.1771, 5225.1771] +25-08-31 11:56:29 | D | + Adding low-rank branches to single_transformer_blocks.22.proj_out.linears.1 +25-08-31 11:56:47 | D | - Calibrating low-rank branches of block single_transformer_blocks.23 +25-08-31 11:56:47 | D | - Calibrating low-rank branch for single_transformer_blocks.23.attn.to_q, single_transformer_blocks.23.attn.to_k, single_transformer_blocks.23.attn.to_v +25-08-31 11:56:47 | D | + w: sfp4_e2m1_all +25-08-31 11:56:47 | D | + x: sfp4_e2m1_all +25-08-31 11:56:47 | D | + y: None +25-08-31 11:56:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:56:47 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:56:47 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:56:48 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:56:57 | D | - iter = [ 0, 1, 2, 3] +25-08-31 11:56:57 | D | - error = [28785.2862, 28279.0404, 28067.1880, 28269.2076] +25-08-31 11:56:57 | D | - best error = [28785.2862, 28279.0404, 28067.1880, 28067.1880] +25-08-31 11:56:57 | D | + Adding low-rank branches to single_transformer_blocks.23.attn.to_q, single_transformer_blocks.23.attn.to_k, single_transformer_blocks.23.attn.to_v +25-08-31 11:56:57 | D | - Calibrating low-rank branch for single_transformer_blocks.23.proj_out.linears.0 +25-08-31 11:56:57 | D | + w: sfp4_e2m1_all +25-08-31 11:56:57 | D | + x: sfp4_e2m1_all +25-08-31 11:56:57 | D | + y: None +25-08-31 11:56:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:56:57 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:56:57 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:56:58 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:57:13 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:57:13 | D | - error = [ 2753.4845, 2727.7953, 2710.6606, 2700.6913, 2688.0625, 2681.0619, 2677.7694, 2674.3902, 2669.6809, 2666.8319] +25-08-31 11:57:13 | D | - best error = [ 2753.4845, 2727.7953, 2710.6606, 2700.6913, 2688.0625, 2681.0619, 2677.7694, 2674.3902, 2669.6809, 2666.8319] +25-08-31 11:57:28 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:57:28 | D | - error = [ 2662.1971, 2660.8435, 2657.2550, 2651.7887, 2649.1591, 2646.6821, 2646.1335, 2645.6690, 2643.2512, 2640.6726] +25-08-31 11:57:28 | D | - best error = [ 2662.1971, 2660.8435, 2657.2550, 2651.7887, 2649.1591, 2646.6821, 2646.1335, 2645.6690, 2643.2512, 2640.6726] +25-08-31 11:57:31 | D | - iter = [ 20, 21] +25-08-31 11:57:31 | D | - error = [ 2640.1127, 2640.5143] +25-08-31 11:57:31 | D | - best error = [ 2640.1127, 2640.1127] +25-08-31 11:57:31 | D | + Adding low-rank branches to single_transformer_blocks.23.proj_out.linears.0 +25-08-31 11:57:31 | D | - Calibrating low-rank branch for single_transformer_blocks.23.proj_mlp +25-08-31 11:57:31 | D | + w: sfp4_e2m1_all +25-08-31 11:57:31 | D | + x: sfp4_e2m1_all +25-08-31 11:57:31 | D | + y: None +25-08-31 11:57:31 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:57:31 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:57:31 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:57:33 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:57:52 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 11:57:52 | D | - error = [ 4379.0168, 4353.8996, 4336.0507, 4323.2867, 4312.7063, 4304.9167, 4298.5719, 4292.9533, 4299.0818] +25-08-31 11:57:52 | D | - best error = [ 4379.0168, 4353.8996, 4336.0507, 4323.2867, 4312.7063, 4304.9167, 4298.5719, 4292.9533, 4292.9533] +25-08-31 11:57:52 | D | + Adding low-rank branches to single_transformer_blocks.23.proj_mlp +25-08-31 11:57:52 | D | - Calibrating low-rank branch for single_transformer_blocks.23.proj_out.linears.1 +25-08-31 11:57:52 | D | + w: sfp4_e2m1_all +25-08-31 11:57:52 | D | + x: sfp4_e2m1_all +25-08-31 11:57:52 | D | + y: None +25-08-31 11:57:52 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:57:52 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:57:52 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:57:57 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:58:29 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:58:29 | D | - error = [ 5122.7752, 5087.6930, 5062.3342, 5039.5712, 5024.8081, 5013.1906, 4997.9746, 4990.9284, 4979.7482, 4971.9718] +25-08-31 11:58:29 | D | - best error = [ 5122.7752, 5087.6930, 5062.3342, 5039.5712, 5024.8081, 5013.1906, 4997.9746, 4990.9284, 4979.7482, 4971.9718] +25-08-31 11:59:01 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 11:59:01 | D | - error = [ 4966.7527, 4958.0173, 4954.1409, 4949.6256, 4944.6213, 4944.4656, 4940.6002, 4936.5960, 4934.6088, 4931.7672] +25-08-31 11:59:01 | D | - best error = [ 4966.7527, 4958.0173, 4954.1409, 4949.6256, 4944.6213, 4944.4656, 4940.6002, 4936.5960, 4934.6088, 4931.7672] +25-08-31 11:59:13 | D | - iter = [ 20, 21, 22, 23] +25-08-31 11:59:13 | D | - error = [ 4930.0391, 4929.1758, 4925.6973, 4926.4072] +25-08-31 11:59:13 | D | - best error = [ 4930.0391, 4929.1758, 4925.6973, 4925.6973] +25-08-31 11:59:13 | D | + Adding low-rank branches to single_transformer_blocks.23.proj_out.linears.1 +25-08-31 11:59:31 | D | - Calibrating low-rank branches of block single_transformer_blocks.24 +25-08-31 11:59:31 | D | - Calibrating low-rank branch for single_transformer_blocks.24.attn.to_q, single_transformer_blocks.24.attn.to_k, single_transformer_blocks.24.attn.to_v +25-08-31 11:59:31 | D | + w: sfp4_e2m1_all +25-08-31 11:59:31 | D | + x: sfp4_e2m1_all +25-08-31 11:59:31 | D | + y: None +25-08-31 11:59:31 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:59:31 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:59:31 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:59:32 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:59:41 | D | - iter = [ 0, 1, 2, 3] +25-08-31 11:59:41 | D | - error = [26280.7026, 25885.3052, 25588.1935, 25813.2691] +25-08-31 11:59:41 | D | - best error = [26280.7026, 25885.3052, 25588.1935, 25588.1935] +25-08-31 11:59:41 | D | + Adding low-rank branches to single_transformer_blocks.24.attn.to_q, single_transformer_blocks.24.attn.to_k, single_transformer_blocks.24.attn.to_v +25-08-31 11:59:41 | D | - Calibrating low-rank branch for single_transformer_blocks.24.proj_out.linears.0 +25-08-31 11:59:41 | D | + w: sfp4_e2m1_all +25-08-31 11:59:41 | D | + x: sfp4_e2m1_all +25-08-31 11:59:41 | D | + y: None +25-08-31 11:59:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 11:59:41 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 11:59:41 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 11:59:42 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 11:59:58 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 11:59:58 | D | - error = [ 2333.3981, 2317.9181, 2300.1015, 2289.6632, 2284.4688, 2275.0424, 2269.0258, 2263.7989, 2258.3266, 2251.3850] +25-08-31 11:59:58 | D | - best error = [ 2333.3981, 2317.9181, 2300.1015, 2289.6632, 2284.4688, 2275.0424, 2269.0258, 2263.7989, 2258.3266, 2251.3850] +25-08-31 12:00:05 | D | - iter = [ 10, 11, 12, 13, 14] +25-08-31 12:00:05 | D | - error = [ 2248.3478, 2247.8536, 2244.5266, 2242.4455, 2244.0406] +25-08-31 12:00:05 | D | - best error = [ 2248.3478, 2247.8536, 2244.5266, 2242.4455, 2242.4455] +25-08-31 12:00:05 | D | + Adding low-rank branches to single_transformer_blocks.24.proj_out.linears.0 +25-08-31 12:00:05 | D | - Calibrating low-rank branch for single_transformer_blocks.24.proj_mlp +25-08-31 12:00:05 | D | + w: sfp4_e2m1_all +25-08-31 12:00:05 | D | + x: sfp4_e2m1_all +25-08-31 12:00:05 | D | + y: None +25-08-31 12:00:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:00:05 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:00:05 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:00:07 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:00:28 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:00:28 | D | - error = [ 4127.6688, 4107.6529, 4094.3543, 4085.4690, 4078.1539, 4072.2655, 4066.7074, 4062.7561, 4059.0716, 4055.9973] +25-08-31 12:00:28 | D | - best error = [ 4127.6688, 4107.6529, 4094.3543, 4085.4690, 4078.1539, 4072.2655, 4066.7074, 4062.7561, 4059.0716, 4055.9973] +25-08-31 12:00:45 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17] +25-08-31 12:00:45 | D | - error = [ 4053.1906, 4050.7947, 4047.9662, 4045.8197, 4042.9100, 4041.4612, 4039.8613, 4047.8880] +25-08-31 12:00:45 | D | - best error = [ 4053.1906, 4050.7947, 4047.9662, 4045.8197, 4042.9100, 4041.4612, 4039.8613, 4039.8613] +25-08-31 12:00:45 | D | + Adding low-rank branches to single_transformer_blocks.24.proj_mlp +25-08-31 12:00:45 | D | - Calibrating low-rank branch for single_transformer_blocks.24.proj_out.linears.1 +25-08-31 12:00:45 | D | + w: sfp4_e2m1_all +25-08-31 12:00:45 | D | + x: sfp4_e2m1_all +25-08-31 12:00:45 | D | + y: None +25-08-31 12:00:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:00:45 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:00:45 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:00:50 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:01:21 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:01:21 | D | - error = [ 5334.7529, 5292.8575, 5267.0627, 5240.9423, 5219.9153, 5210.1061, 5198.0816, 5190.1306, 5180.6890, 5178.3466] +25-08-31 12:01:21 | D | - best error = [ 5334.7529, 5292.8575, 5267.0627, 5240.9423, 5219.9153, 5210.1061, 5198.0816, 5190.1306, 5180.6890, 5178.3466] +25-08-31 12:01:54 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 12:01:54 | D | - error = [ 5172.3155, 5165.7013, 5155.2882, 5154.5083, 5152.3016, 5148.7964, 5144.7507, 5140.3580, 5138.4266, 5138.9828] +25-08-31 12:01:54 | D | - best error = [ 5172.3155, 5165.7013, 5155.2882, 5154.5083, 5152.3016, 5148.7964, 5144.7507, 5140.3580, 5138.4266, 5138.4266] +25-08-31 12:01:55 | D | + Adding low-rank branches to single_transformer_blocks.24.proj_out.linears.1 +25-08-31 12:02:12 | D | - Calibrating low-rank branches of block single_transformer_blocks.25 +25-08-31 12:02:12 | D | - Calibrating low-rank branch for single_transformer_blocks.25.attn.to_q, single_transformer_blocks.25.attn.to_k, single_transformer_blocks.25.attn.to_v +25-08-31 12:02:12 | D | + w: sfp4_e2m1_all +25-08-31 12:02:12 | D | + x: sfp4_e2m1_all +25-08-31 12:02:12 | D | + y: None +25-08-31 12:02:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:02:12 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:02:12 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:02:14 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:02:36 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:02:36 | D | - error = [37638.6508, 37249.2440, 36947.3589, 36579.4487, 36423.2629, 36353.5229, 36219.3478, 36091.2586, 36041.6293, 36080.5641] +25-08-31 12:02:36 | D | - best error = [37638.6508, 37249.2440, 36947.3589, 36579.4487, 36423.2629, 36353.5229, 36219.3478, 36091.2586, 36041.6293, 36041.6293] +25-08-31 12:02:36 | D | + Adding low-rank branches to single_transformer_blocks.25.attn.to_q, single_transformer_blocks.25.attn.to_k, single_transformer_blocks.25.attn.to_v +25-08-31 12:02:36 | D | - Calibrating low-rank branch for single_transformer_blocks.25.proj_out.linears.0 +25-08-31 12:02:36 | D | + w: sfp4_e2m1_all +25-08-31 12:02:36 | D | + x: sfp4_e2m1_all +25-08-31 12:02:36 | D | + y: None +25-08-31 12:02:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:02:36 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:02:36 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:02:37 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:02:49 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 12:02:49 | D | - error = [ 2471.0543, 2446.5559, 2430.4562, 2417.2624, 2409.1217, 2402.4793, 2397.3947, 2400.9581] +25-08-31 12:02:49 | D | - best error = [ 2471.0543, 2446.5559, 2430.4562, 2417.2624, 2409.1217, 2402.4793, 2397.3947, 2397.3947] +25-08-31 12:02:49 | D | + Adding low-rank branches to single_transformer_blocks.25.proj_out.linears.0 +25-08-31 12:02:50 | D | - Calibrating low-rank branch for single_transformer_blocks.25.proj_mlp +25-08-31 12:02:50 | D | + w: sfp4_e2m1_all +25-08-31 12:02:50 | D | + x: sfp4_e2m1_all +25-08-31 12:02:50 | D | + y: None +25-08-31 12:02:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:02:50 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:02:50 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:02:51 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:03:12 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:03:12 | D | - error = [ 3976.7697, 3957.7960, 3946.0782, 3937.6865, 3930.3878, 3924.0088, 3919.8385, 3916.0937, 3912.9201, 3909.5271] +25-08-31 12:03:12 | D | - best error = [ 3976.7697, 3957.7960, 3946.0782, 3937.6865, 3930.3878, 3924.0088, 3919.8385, 3916.0937, 3912.9201, 3909.5271] +25-08-31 12:03:33 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 12:03:33 | D | - error = [ 3907.2970, 3905.0045, 3902.8023, 3900.7636, 3898.9065, 3898.0300, 3896.5800, 3895.5866, 3894.3183, 3893.6907] +25-08-31 12:03:33 | D | - best error = [ 3907.2970, 3905.0045, 3902.8023, 3900.7636, 3898.9065, 3898.0300, 3896.5800, 3895.5866, 3894.3183, 3893.6907] +25-08-31 12:03:55 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 12:03:55 | D | - error = [ 3892.3716, 3892.0478, 3890.6708, 3889.9806, 3888.5911, 3887.6448, 3886.9012, 3886.5323, 3885.6011, 3885.6261] +25-08-31 12:03:55 | D | - best error = [ 3892.3716, 3892.0478, 3890.6708, 3889.9806, 3888.5911, 3887.6448, 3886.9012, 3886.5323, 3885.6011, 3885.6011] +25-08-31 12:03:55 | D | + Adding low-rank branches to single_transformer_blocks.25.proj_mlp +25-08-31 12:03:56 | D | - Calibrating low-rank branch for single_transformer_blocks.25.proj_out.linears.1 +25-08-31 12:03:56 | D | + w: sfp4_e2m1_all +25-08-31 12:03:56 | D | + x: sfp4_e2m1_all +25-08-31 12:03:56 | D | + y: None +25-08-31 12:03:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:03:56 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:03:56 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:04:00 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:04:33 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:04:33 | D | - error = [ 5546.4863, 5498.0640, 5467.8305, 5444.6183, 5422.2300, 5411.5872, 5399.9809, 5385.3100, 5377.2088, 5372.1184] +25-08-31 12:04:33 | D | - best error = [ 5546.4863, 5498.0640, 5467.8305, 5444.6183, 5422.2300, 5411.5872, 5399.9809, 5385.3100, 5377.2088, 5372.1184] +25-08-31 12:05:02 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18] +25-08-31 12:05:02 | D | - error = [ 5364.0201, 5354.9192, 5350.8444, 5348.1610, 5343.2005, 5340.1957, 5333.6997, 5332.5791, 5335.3605] +25-08-31 12:05:02 | D | - best error = [ 5364.0201, 5354.9192, 5350.8444, 5348.1610, 5343.2005, 5340.1957, 5333.6997, 5332.5791, 5332.5791] +25-08-31 12:05:02 | D | + Adding low-rank branches to single_transformer_blocks.25.proj_out.linears.1 +25-08-31 12:05:20 | D | - Calibrating low-rank branches of block single_transformer_blocks.26 +25-08-31 12:05:20 | D | - Calibrating low-rank branch for single_transformer_blocks.26.attn.to_q, single_transformer_blocks.26.attn.to_k, single_transformer_blocks.26.attn.to_v +25-08-31 12:05:20 | D | + w: sfp4_e2m1_all +25-08-31 12:05:20 | D | + x: sfp4_e2m1_all +25-08-31 12:05:20 | D | + y: None +25-08-31 12:05:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:05:20 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:05:20 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:05:21 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:05:44 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:05:44 | D | - error = [31443.2731, 30813.7164, 30598.4784, 30389.4045, 30192.3964, 30078.4106, 30029.8806, 29882.1001, 29760.0054, 29694.2941] +25-08-31 12:05:44 | D | - best error = [31443.2731, 30813.7164, 30598.4784, 30389.4045, 30192.3964, 30078.4106, 30029.8806, 29882.1001, 29760.0054, 29694.2941] +25-08-31 12:05:48 | D | - iter = [ 10, 11] +25-08-31 12:05:48 | D | - error = [29690.3928, 29810.9886] +25-08-31 12:05:48 | D | - best error = [29690.3928, 29690.3928] +25-08-31 12:05:48 | D | + Adding low-rank branches to single_transformer_blocks.26.attn.to_q, single_transformer_blocks.26.attn.to_k, single_transformer_blocks.26.attn.to_v +25-08-31 12:05:49 | D | - Calibrating low-rank branch for single_transformer_blocks.26.proj_out.linears.0 +25-08-31 12:05:49 | D | + w: sfp4_e2m1_all +25-08-31 12:05:49 | D | + x: sfp4_e2m1_all +25-08-31 12:05:49 | D | + y: None +25-08-31 12:05:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:05:49 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:05:49 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:05:50 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:06:04 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 12:06:04 | D | - error = [ 2592.4692, 2555.6856, 2533.2275, 2514.6370, 2511.0148, 2499.2819, 2491.8049, 2486.5708, 2491.9886] +25-08-31 12:06:04 | D | - best error = [ 2592.4692, 2555.6856, 2533.2275, 2514.6370, 2511.0148, 2499.2819, 2491.8049, 2486.5708, 2486.5708] +25-08-31 12:06:04 | D | + Adding low-rank branches to single_transformer_blocks.26.proj_out.linears.0 +25-08-31 12:06:04 | D | - Calibrating low-rank branch for single_transformer_blocks.26.proj_mlp +25-08-31 12:06:04 | D | + w: sfp4_e2m1_all +25-08-31 12:06:04 | D | + x: sfp4_e2m1_all +25-08-31 12:06:04 | D | + y: None +25-08-31 12:06:04 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:06:04 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:06:04 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:06:06 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:06:27 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:06:27 | D | - error = [ 4128.3348, 4110.6430, 4093.7892, 4082.3393, 4074.7890, 4068.1824, 4062.5115, 4057.6088, 4054.3104, 4051.3800] +25-08-31 12:06:27 | D | - best error = [ 4128.3348, 4110.6430, 4093.7892, 4082.3393, 4074.7890, 4068.1824, 4062.5115, 4057.6088, 4054.3104, 4051.3800] +25-08-31 12:06:38 | D | - iter = [ 10, 11, 12, 13, 14] +25-08-31 12:06:38 | D | - error = [ 4048.4646, 4045.2372, 4042.9001, 4041.1691, 4044.5178] +25-08-31 12:06:38 | D | - best error = [ 4048.4646, 4045.2372, 4042.9001, 4041.1691, 4041.1691] +25-08-31 12:06:38 | D | + Adding low-rank branches to single_transformer_blocks.26.proj_mlp +25-08-31 12:06:39 | D | - Calibrating low-rank branch for single_transformer_blocks.26.proj_out.linears.1 +25-08-31 12:06:39 | D | + w: sfp4_e2m1_all +25-08-31 12:06:39 | D | + x: sfp4_e2m1_all +25-08-31 12:06:39 | D | + y: None +25-08-31 12:06:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:06:39 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:06:39 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:06:43 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:07:15 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:07:15 | D | - error = [ 5447.8656, 5399.5552, 5367.1287, 5337.9179, 5318.0184, 5304.9914, 5292.8110, 5283.1680, 5272.5925, 5258.4260] +25-08-31 12:07:15 | D | - best error = [ 5447.8656, 5399.5552, 5367.1287, 5337.9179, 5318.0184, 5304.9914, 5292.8110, 5283.1680, 5272.5925, 5258.4260] +25-08-31 12:07:25 | D | - iter = [ 10, 11, 12] +25-08-31 12:07:25 | D | - error = [ 5254.8964, 5249.0529, 5256.3440] +25-08-31 12:07:25 | D | - best error = [ 5254.8964, 5249.0529, 5249.0529] +25-08-31 12:07:25 | D | + Adding low-rank branches to single_transformer_blocks.26.proj_out.linears.1 +25-08-31 12:07:43 | D | - Calibrating low-rank branches of block single_transformer_blocks.27 +25-08-31 12:07:43 | D | - Calibrating low-rank branch for single_transformer_blocks.27.attn.to_q, single_transformer_blocks.27.attn.to_k, single_transformer_blocks.27.attn.to_v +25-08-31 12:07:43 | D | + w: sfp4_e2m1_all +25-08-31 12:07:43 | D | + x: sfp4_e2m1_all +25-08-31 12:07:43 | D | + y: None +25-08-31 12:07:43 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:07:43 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:07:43 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:07:45 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:08:04 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 12:08:04 | D | - error = [43876.6300, 43151.1709, 42543.5668, 42283.5367, 42129.2138, 42112.2067, 41893.8483, 41742.7735, 41743.2549] +25-08-31 12:08:04 | D | - best error = [43876.6300, 43151.1709, 42543.5668, 42283.5367, 42129.2138, 42112.2067, 41893.8483, 41742.7735, 41742.7735] +25-08-31 12:08:04 | D | + Adding low-rank branches to single_transformer_blocks.27.attn.to_q, single_transformer_blocks.27.attn.to_k, single_transformer_blocks.27.attn.to_v +25-08-31 12:08:05 | D | - Calibrating low-rank branch for single_transformer_blocks.27.proj_out.linears.0 +25-08-31 12:08:05 | D | + w: sfp4_e2m1_all +25-08-31 12:08:05 | D | + x: sfp4_e2m1_all +25-08-31 12:08:05 | D | + y: None +25-08-31 12:08:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:08:05 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:08:05 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:08:05 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:08:21 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:08:21 | D | - error = [ 1665.0829, 1638.2379, 1621.5788, 1611.5126, 1604.3789, 1597.4353, 1594.7767, 1591.7341, 1587.7033, 1586.8714] +25-08-31 12:08:21 | D | - best error = [ 1665.0829, 1638.2379, 1621.5788, 1611.5126, 1604.3789, 1597.4353, 1594.7767, 1591.7341, 1587.7033, 1586.8714] +25-08-31 12:08:24 | D | - iter = [ 10, 11] +25-08-31 12:08:24 | D | - error = [ 1584.4360, 1584.5936] +25-08-31 12:08:24 | D | - best error = [ 1584.4360, 1584.4360] +25-08-31 12:08:24 | D | + Adding low-rank branches to single_transformer_blocks.27.proj_out.linears.0 +25-08-31 12:08:24 | D | - Calibrating low-rank branch for single_transformer_blocks.27.proj_mlp +25-08-31 12:08:24 | D | + w: sfp4_e2m1_all +25-08-31 12:08:24 | D | + x: sfp4_e2m1_all +25-08-31 12:08:24 | D | + y: None +25-08-31 12:08:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:08:24 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:08:24 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:08:26 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:08:38 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 12:08:38 | D | - error = [ 4333.3135, 4303.5248, 4286.5721, 4273.1628, 4263.7523, 4265.8633] +25-08-31 12:08:38 | D | - best error = [ 4333.3135, 4303.5248, 4286.5721, 4273.1628, 4263.7523, 4263.7523] +25-08-31 12:08:38 | D | + Adding low-rank branches to single_transformer_blocks.27.proj_mlp +25-08-31 12:08:39 | D | - Calibrating low-rank branch for single_transformer_blocks.27.proj_out.linears.1 +25-08-31 12:08:39 | D | + w: sfp4_e2m1_all +25-08-31 12:08:39 | D | + x: sfp4_e2m1_all +25-08-31 12:08:39 | D | + y: None +25-08-31 12:08:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:08:39 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:08:39 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:08:43 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:09:01 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 12:09:01 | D | - error = [ 4733.9409, 4693.4904, 4663.6923, 4643.4547, 4629.9812, 4637.0564] +25-08-31 12:09:01 | D | - best error = [ 4733.9409, 4693.4904, 4663.6923, 4643.4547, 4629.9812, 4629.9812] +25-08-31 12:09:02 | D | + Adding low-rank branches to single_transformer_blocks.27.proj_out.linears.1 +25-08-31 12:09:19 | D | - Calibrating low-rank branches of block single_transformer_blocks.28 +25-08-31 12:09:19 | D | - Calibrating low-rank branch for single_transformer_blocks.28.attn.to_q, single_transformer_blocks.28.attn.to_k, single_transformer_blocks.28.attn.to_v +25-08-31 12:09:19 | D | + w: sfp4_e2m1_all +25-08-31 12:09:19 | D | + x: sfp4_e2m1_all +25-08-31 12:09:19 | D | + y: None +25-08-31 12:09:19 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:09:19 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:09:19 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:09:21 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:09:36 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 12:09:36 | D | - error = [43321.7248, 42428.0948, 41987.5425, 41566.2648, 41480.7383, 41252.9151, 41285.4570] +25-08-31 12:09:36 | D | - best error = [43321.7248, 42428.0948, 41987.5425, 41566.2648, 41480.7383, 41252.9151, 41252.9151] +25-08-31 12:09:36 | D | + Adding low-rank branches to single_transformer_blocks.28.attn.to_q, single_transformer_blocks.28.attn.to_k, single_transformer_blocks.28.attn.to_v +25-08-31 12:09:37 | D | - Calibrating low-rank branch for single_transformer_blocks.28.proj_out.linears.0 +25-08-31 12:09:37 | D | + w: sfp4_e2m1_all +25-08-31 12:09:37 | D | + x: sfp4_e2m1_all +25-08-31 12:09:37 | D | + y: None +25-08-31 12:09:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:09:37 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:09:37 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:09:37 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:09:53 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:09:53 | D | - error = [ 1640.2785, 1625.4375, 1614.0110, 1606.1977, 1600.0252, 1598.6820, 1596.1164, 1592.9915, 1591.9672, 1591.2187] +25-08-31 12:09:53 | D | - best error = [ 1640.2785, 1625.4375, 1614.0110, 1606.1977, 1600.0252, 1598.6820, 1596.1164, 1592.9915, 1591.9672, 1591.2187] +25-08-31 12:09:56 | D | - iter = [ 10, 11] +25-08-31 12:09:56 | D | - error = [ 1589.3393, 1589.9780] +25-08-31 12:09:56 | D | - best error = [ 1589.3393, 1589.3393] +25-08-31 12:09:56 | D | + Adding low-rank branches to single_transformer_blocks.28.proj_out.linears.0 +25-08-31 12:09:56 | D | - Calibrating low-rank branch for single_transformer_blocks.28.proj_mlp +25-08-31 12:09:56 | D | + w: sfp4_e2m1_all +25-08-31 12:09:56 | D | + x: sfp4_e2m1_all +25-08-31 12:09:56 | D | + y: None +25-08-31 12:09:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:09:56 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:09:56 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:09:58 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:10:19 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:10:19 | D | - error = [ 4336.6201, 4310.0624, 4293.6700, 4281.2825, 4272.7857, 4266.1191, 4259.6944, 4254.7080, 4251.1104, 4247.4760] +25-08-31 12:10:19 | D | - best error = [ 4336.6201, 4310.0624, 4293.6700, 4281.2825, 4272.7857, 4266.1191, 4259.6944, 4254.7080, 4251.1104, 4247.4760] +25-08-31 12:10:40 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 12:10:40 | D | - error = [ 4244.7671, 4241.5389, 4239.8703, 4238.2893, 4235.9504, 4234.5751, 4232.3506, 4230.7115, 4229.4243, 4228.6806] +25-08-31 12:10:40 | D | - best error = [ 4244.7671, 4241.5389, 4239.8703, 4238.2893, 4235.9504, 4234.5751, 4232.3506, 4230.7115, 4229.4243, 4228.6806] +25-08-31 12:11:02 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 12:11:02 | D | - error = [ 4228.0553, 4226.8653, 4225.3620, 4224.3797, 4223.3333, 4222.9743, 4221.5047, 4220.9383, 4219.9735, 4219.5682] +25-08-31 12:11:02 | D | - best error = [ 4228.0553, 4226.8653, 4225.3620, 4224.3797, 4223.3333, 4222.9743, 4221.5047, 4220.9383, 4219.9735, 4219.5682] +25-08-31 12:11:23 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-31 12:11:23 | D | - error = [ 4218.2243, 4217.4691, 4216.4267, 4215.9061, 4215.0274, 4214.2197, 4213.4395, 4212.8876, 4211.8884, 4210.8322] +25-08-31 12:11:23 | D | - best error = [ 4218.2243, 4217.4691, 4216.4267, 4215.9061, 4215.0274, 4214.2197, 4213.4395, 4212.8876, 4211.8884, 4210.8322] +25-08-31 12:11:34 | D | - iter = [ 40, 41, 42, 43, 44] +25-08-31 12:11:34 | D | - error = [ 4210.7681, 4210.5408, 4209.8485, 4209.4833, 4209.9140] +25-08-31 12:11:34 | D | - best error = [ 4210.7681, 4210.5408, 4209.8485, 4209.4833, 4209.4833] +25-08-31 12:11:35 | D | + Adding low-rank branches to single_transformer_blocks.28.proj_mlp +25-08-31 12:11:35 | D | - Calibrating low-rank branch for single_transformer_blocks.28.proj_out.linears.1 +25-08-31 12:11:35 | D | + w: sfp4_e2m1_all +25-08-31 12:11:35 | D | + x: sfp4_e2m1_all +25-08-31 12:11:35 | D | + y: None +25-08-31 12:11:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:11:35 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:11:35 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:11:40 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:12:11 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:12:11 | D | - error = [ 3609.2066, 3580.7871, 3561.1825, 3540.0880, 3532.8821, 3521.1535, 3513.3147, 3507.0971, 3501.9569, 3502.1318] +25-08-31 12:12:11 | D | - best error = [ 3609.2066, 3580.7871, 3561.1825, 3540.0880, 3532.8821, 3521.1535, 3513.3147, 3507.0971, 3501.9569, 3501.9569] +25-08-31 12:12:11 | D | + Adding low-rank branches to single_transformer_blocks.28.proj_out.linears.1 +25-08-31 12:12:29 | D | - Calibrating low-rank branches of block single_transformer_blocks.29 +25-08-31 12:12:29 | D | - Calibrating low-rank branch for single_transformer_blocks.29.attn.to_q, single_transformer_blocks.29.attn.to_k, single_transformer_blocks.29.attn.to_v +25-08-31 12:12:29 | D | + w: sfp4_e2m1_all +25-08-31 12:12:29 | D | + x: sfp4_e2m1_all +25-08-31 12:12:29 | D | + y: None +25-08-31 12:12:29 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:12:29 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:12:29 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:12:30 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:12:52 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:12:52 | D | - error = [35356.4994, 34841.6303, 34370.2178, 34196.2075, 34159.6515, 33884.6948, 33831.8761, 33785.8328, 33725.4743, 33546.1702] +25-08-31 12:12:52 | D | - best error = [35356.4994, 34841.6303, 34370.2178, 34196.2075, 34159.6515, 33884.6948, 33831.8761, 33785.8328, 33725.4743, 33546.1702] +25-08-31 12:13:05 | D | - iter = [ 10, 11, 12, 13, 14, 15] +25-08-31 12:13:05 | D | - error = [33363.6832, 33300.9107, 33174.3747, 32984.1368, 32932.5264, 32986.6514] +25-08-31 12:13:05 | D | - best error = [33363.6832, 33300.9107, 33174.3747, 32984.1368, 32932.5264, 32932.5264] +25-08-31 12:13:06 | D | + Adding low-rank branches to single_transformer_blocks.29.attn.to_q, single_transformer_blocks.29.attn.to_k, single_transformer_blocks.29.attn.to_v +25-08-31 12:13:06 | D | - Calibrating low-rank branch for single_transformer_blocks.29.proj_out.linears.0 +25-08-31 12:13:06 | D | + w: sfp4_e2m1_all +25-08-31 12:13:06 | D | + x: sfp4_e2m1_all +25-08-31 12:13:06 | D | + y: None +25-08-31 12:13:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:13:06 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:13:06 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:13:07 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:13:23 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:13:23 | D | - error = [ 2503.7672, 2479.3480, 2466.3561, 2452.7722, 2443.9436, 2434.5577, 2428.3284, 2419.9624, 2416.9468, 2411.6606] +25-08-31 12:13:23 | D | - best error = [ 2503.7672, 2479.3480, 2466.3561, 2452.7722, 2443.9436, 2434.5577, 2428.3284, 2419.9624, 2416.9468, 2411.6606] +25-08-31 12:13:28 | D | - iter = [ 10, 11, 12, 13] +25-08-31 12:13:28 | D | - error = [ 2409.1050, 2408.4564, 2402.8219, 2406.1631] +25-08-31 12:13:28 | D | - best error = [ 2409.1050, 2408.4564, 2402.8219, 2402.8219] +25-08-31 12:13:29 | D | + Adding low-rank branches to single_transformer_blocks.29.proj_out.linears.0 +25-08-31 12:13:29 | D | - Calibrating low-rank branch for single_transformer_blocks.29.proj_mlp +25-08-31 12:13:29 | D | + w: sfp4_e2m1_all +25-08-31 12:13:29 | D | + x: sfp4_e2m1_all +25-08-31 12:13:29 | D | + y: None +25-08-31 12:13:29 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:13:29 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:13:29 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:13:31 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:13:52 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:13:52 | D | - error = [ 4190.8935, 4171.5570, 4159.2461, 4150.0251, 4142.7911, 4135.7217, 4131.0430, 4127.2776, 4123.4980, 4120.2291] +25-08-31 12:13:52 | D | - best error = [ 4190.8935, 4171.5570, 4159.2461, 4150.0251, 4142.7911, 4135.7217, 4131.0430, 4127.2776, 4123.4980, 4120.2291] +25-08-31 12:14:14 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 12:14:14 | D | - error = [ 4117.4215, 4116.1681, 4114.0705, 4112.2082, 4110.9198, 4109.3372, 4108.1967, 4106.8669, 4106.2623, 4105.5579] +25-08-31 12:14:14 | D | - best error = [ 4117.4215, 4116.1681, 4114.0705, 4112.2082, 4110.9198, 4109.3372, 4108.1967, 4106.8669, 4106.2623, 4105.5579] +25-08-31 12:14:36 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 12:14:36 | D | - error = [ 4104.6286, 4103.9498, 4102.9446, 4102.1314, 4100.9831, 4100.0595, 4099.3090, 4098.0425, 4097.5634, 4097.0909] +25-08-31 12:14:36 | D | - best error = [ 4104.6286, 4103.9498, 4102.9446, 4102.1314, 4100.9831, 4100.0595, 4099.3090, 4098.0425, 4097.5634, 4097.0909] +25-08-31 12:14:58 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-31 12:14:58 | D | - error = [ 4096.6486, 4096.4502, 4095.6218, 4095.0452, 4094.3170, 4093.6172, 4093.0441, 4092.7238, 4092.4609, 4092.3767] +25-08-31 12:14:58 | D | - best error = [ 4096.6486, 4096.4502, 4095.6218, 4095.0452, 4094.3170, 4093.6172, 4093.0441, 4092.7238, 4092.4609, 4092.3767] +25-08-31 12:15:17 | D | - iter = [ 40, 41, 42, 43, 44, 45, 46, 47, 48] +25-08-31 12:15:17 | D | - error = [ 4091.8152, 4091.0417, 4090.4983, 4090.4338, 4090.2787, 4090.2494, 4089.5946, 4089.2948, 4089.5053] +25-08-31 12:15:17 | D | - best error = [ 4091.8152, 4091.0417, 4090.4983, 4090.4338, 4090.2787, 4090.2494, 4089.5946, 4089.2948, 4089.2948] +25-08-31 12:15:17 | D | + Adding low-rank branches to single_transformer_blocks.29.proj_mlp +25-08-31 12:15:17 | D | - Calibrating low-rank branch for single_transformer_blocks.29.proj_out.linears.1 +25-08-31 12:15:17 | D | + w: sfp4_e2m1_all +25-08-31 12:15:17 | D | + x: sfp4_e2m1_all +25-08-31 12:15:17 | D | + y: None +25-08-31 12:15:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:15:17 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:15:17 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:15:22 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:15:53 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:15:53 | D | - error = [ 3933.0775, 3906.1530, 3883.2346, 3865.0074, 3855.1337, 3845.9596, 3836.8101, 3828.5142, 3821.0902, 3814.6352] +25-08-31 12:15:53 | D | - best error = [ 3933.0775, 3906.1530, 3883.2346, 3865.0074, 3855.1337, 3845.9596, 3836.8101, 3828.5142, 3821.0902, 3814.6352] +25-08-31 12:16:00 | D | - iter = [ 10, 11] +25-08-31 12:16:00 | D | - error = [ 3813.2047, 3814.2495] +25-08-31 12:16:00 | D | - best error = [ 3813.2047, 3813.2047] +25-08-31 12:16:01 | D | + Adding low-rank branches to single_transformer_blocks.29.proj_out.linears.1 +25-08-31 12:16:18 | D | - Calibrating low-rank branches of block single_transformer_blocks.30 +25-08-31 12:16:18 | D | - Calibrating low-rank branch for single_transformer_blocks.30.attn.to_q, single_transformer_blocks.30.attn.to_k, single_transformer_blocks.30.attn.to_v +25-08-31 12:16:18 | D | + w: sfp4_e2m1_all +25-08-31 12:16:18 | D | + x: sfp4_e2m1_all +25-08-31 12:16:18 | D | + y: None +25-08-31 12:16:18 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:16:18 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:16:18 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:16:20 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:16:35 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 12:16:35 | D | - error = [54713.5411, 53404.4744, 53253.7906, 52863.9031, 52631.6759, 52282.4648, 52339.3677] +25-08-31 12:16:35 | D | - best error = [54713.5411, 53404.4744, 53253.7906, 52863.9031, 52631.6759, 52282.4648, 52282.4648] +25-08-31 12:16:35 | D | + Adding low-rank branches to single_transformer_blocks.30.attn.to_q, single_transformer_blocks.30.attn.to_k, single_transformer_blocks.30.attn.to_v +25-08-31 12:16:35 | D | - Calibrating low-rank branch for single_transformer_blocks.30.proj_out.linears.0 +25-08-31 12:16:35 | D | + w: sfp4_e2m1_all +25-08-31 12:16:35 | D | + x: sfp4_e2m1_all +25-08-31 12:16:35 | D | + y: None +25-08-31 12:16:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:16:35 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:16:35 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:16:36 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:16:50 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 12:16:50 | D | - error = [ 2426.7888, 2410.2823, 2389.7403, 2375.4118, 2368.3621, 2360.5774, 2354.4648, 2347.2820, 2351.4079] +25-08-31 12:16:50 | D | - best error = [ 2426.7888, 2410.2823, 2389.7403, 2375.4118, 2368.3621, 2360.5774, 2354.4648, 2347.2820, 2347.2820] +25-08-31 12:16:50 | D | + Adding low-rank branches to single_transformer_blocks.30.proj_out.linears.0 +25-08-31 12:16:50 | D | - Calibrating low-rank branch for single_transformer_blocks.30.proj_mlp +25-08-31 12:16:50 | D | + w: sfp4_e2m1_all +25-08-31 12:16:50 | D | + x: sfp4_e2m1_all +25-08-31 12:16:50 | D | + y: None +25-08-31 12:16:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:16:50 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:16:50 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:16:52 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:17:13 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:17:13 | D | - error = [ 4277.1093, 4254.7691, 4239.2179, 4227.3226, 4219.1772, 4213.5318, 4206.8501, 4202.3365, 4198.6634, 4196.1397] +25-08-31 12:17:13 | D | - best error = [ 4277.1093, 4254.7691, 4239.2179, 4227.3226, 4219.1772, 4213.5318, 4206.8501, 4202.3365, 4198.6634, 4196.1397] +25-08-31 12:17:34 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 12:17:34 | D | - error = [ 4193.7918, 4191.2836, 4189.0810, 4186.2329, 4183.7321, 4182.5884, 4180.4349, 4178.1967, 4176.9752, 4176.1446] +25-08-31 12:17:34 | D | - best error = [ 4193.7918, 4191.2836, 4189.0810, 4186.2329, 4183.7321, 4182.5884, 4180.4349, 4178.1967, 4176.9752, 4176.1446] +25-08-31 12:17:56 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 12:17:56 | D | - error = [ 4174.6765, 4173.9054, 4172.5853, 4171.2408, 4170.0139, 4169.2995, 4169.0687, 4168.3508, 4167.7925, 4166.8633] +25-08-31 12:17:56 | D | - best error = [ 4174.6765, 4173.9054, 4172.5853, 4171.2408, 4170.0139, 4169.2995, 4169.0687, 4168.3508, 4167.7925, 4166.8633] +25-08-31 12:17:58 | D | - iter = [ 30] +25-08-31 12:17:58 | D | - error = [ 4166.9087] +25-08-31 12:17:58 | D | - best error = [ 4166.8633] +25-08-31 12:17:58 | D | + Adding low-rank branches to single_transformer_blocks.30.proj_mlp +25-08-31 12:17:59 | D | - Calibrating low-rank branch for single_transformer_blocks.30.proj_out.linears.1 +25-08-31 12:17:59 | D | + w: sfp4_e2m1_all +25-08-31 12:17:59 | D | + x: sfp4_e2m1_all +25-08-31 12:17:59 | D | + y: None +25-08-31 12:17:59 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:17:59 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:17:59 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:18:03 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:18:35 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:18:35 | D | - error = [ 4063.0026, 4039.5921, 4021.0412, 4007.7612, 3996.8307, 3987.8892, 3981.3979, 3972.8996, 3968.5734, 3962.0583] +25-08-31 12:18:35 | D | - best error = [ 4063.0026, 4039.5921, 4021.0412, 4007.7612, 3996.8307, 3987.8892, 3981.3979, 3972.8996, 3968.5734, 3962.0583] +25-08-31 12:18:51 | D | - iter = [ 10, 11, 12, 13, 14] +25-08-31 12:18:51 | D | - error = [ 3959.7347, 3954.4393, 3949.6225, 3945.9507, 3949.9681] +25-08-31 12:18:51 | D | - best error = [ 3959.7347, 3954.4393, 3949.6225, 3945.9507, 3945.9507] +25-08-31 12:18:51 | D | + Adding low-rank branches to single_transformer_blocks.30.proj_out.linears.1 +25-08-31 12:19:09 | D | - Calibrating low-rank branches of block single_transformer_blocks.31 +25-08-31 12:19:09 | D | - Calibrating low-rank branch for single_transformer_blocks.31.attn.to_q, single_transformer_blocks.31.attn.to_k, single_transformer_blocks.31.attn.to_v +25-08-31 12:19:09 | D | + w: sfp4_e2m1_all +25-08-31 12:19:09 | D | + x: sfp4_e2m1_all +25-08-31 12:19:09 | D | + y: None +25-08-31 12:19:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:19:09 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:19:09 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:19:11 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:19:33 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:19:33 | D | - error = [52103.6383, 51149.0997, 50736.7543, 50362.9426, 50055.9610, 49622.1189, 49580.5252, 49574.5173, 49471.6429, 49435.8290] +25-08-31 12:19:33 | D | - best error = [52103.6383, 51149.0997, 50736.7543, 50362.9426, 50055.9610, 49622.1189, 49580.5252, 49574.5173, 49471.6429, 49435.8290] +25-08-31 12:19:35 | D | - iter = [ 10] +25-08-31 12:19:35 | D | - error = [49450.0546] +25-08-31 12:19:35 | D | - best error = [49435.8290] +25-08-31 12:19:35 | D | + Adding low-rank branches to single_transformer_blocks.31.attn.to_q, single_transformer_blocks.31.attn.to_k, single_transformer_blocks.31.attn.to_v +25-08-31 12:19:35 | D | - Calibrating low-rank branch for single_transformer_blocks.31.proj_out.linears.0 +25-08-31 12:19:35 | D | + w: sfp4_e2m1_all +25-08-31 12:19:35 | D | + x: sfp4_e2m1_all +25-08-31 12:19:35 | D | + y: None +25-08-31 12:19:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:19:35 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:19:35 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:19:37 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:19:51 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:19:51 | D | - error = [ 2694.9736, 2664.8712, 2652.9110, 2635.6141, 2635.6132, 2624.4250, 2618.2632, 2614.2664, 2608.8139, 2605.0125] +25-08-31 12:19:51 | D | - best error = [ 2694.9736, 2664.8712, 2652.9110, 2635.6141, 2635.6132, 2624.4250, 2618.2632, 2614.2664, 2608.8139, 2605.0125] +25-08-31 12:20:01 | D | - iter = [ 10, 11, 12, 13, 14, 15] +25-08-31 12:20:01 | D | - error = [ 2604.0137, 2603.3574, 2601.3467, 2599.8487, 2595.4003, 2595.8758] +25-08-31 12:20:01 | D | - best error = [ 2604.0137, 2603.3574, 2601.3467, 2599.8487, 2595.4003, 2595.4003] +25-08-31 12:20:01 | D | + Adding low-rank branches to single_transformer_blocks.31.proj_out.linears.0 +25-08-31 12:20:01 | D | - Calibrating low-rank branch for single_transformer_blocks.31.proj_mlp +25-08-31 12:20:01 | D | + w: sfp4_e2m1_all +25-08-31 12:20:01 | D | + x: sfp4_e2m1_all +25-08-31 12:20:01 | D | + y: None +25-08-31 12:20:01 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:20:01 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:20:01 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:20:03 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:20:25 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:20:25 | D | - error = [ 3304.1353, 3290.4794, 3280.8749, 3273.7895, 3268.4544, 3264.7553, 3261.3322, 3258.3069, 3255.2098, 3253.1185] +25-08-31 12:20:25 | D | - best error = [ 3304.1353, 3290.4794, 3280.8749, 3273.7895, 3268.4544, 3264.7553, 3261.3322, 3258.3069, 3255.2098, 3253.1185] +25-08-31 12:20:46 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 12:20:46 | D | - error = [ 3251.6158, 3249.7848, 3248.7442, 3247.4265, 3245.8262, 3244.8527, 3243.7717, 3243.2866, 3242.4529, 3241.6929] +25-08-31 12:20:46 | D | - best error = [ 3251.6158, 3249.7848, 3248.7442, 3247.4265, 3245.8262, 3244.8527, 3243.7717, 3243.2866, 3242.4529, 3241.6929] +25-08-31 12:21:08 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 12:21:08 | D | - error = [ 3241.0853, 3239.8508, 3239.3135, 3238.0256, 3237.6224, 3236.9788, 3236.0100, 3235.3422, 3234.7853, 3234.3304] +25-08-31 12:21:08 | D | - best error = [ 3241.0853, 3239.8508, 3239.3135, 3238.0256, 3237.6224, 3236.9788, 3236.0100, 3235.3422, 3234.7853, 3234.3304] +25-08-31 12:21:12 | D | - iter = [ 30, 31] +25-08-31 12:21:12 | D | - error = [ 3233.9845, 3236.1211] +25-08-31 12:21:12 | D | - best error = [ 3233.9845, 3233.9845] +25-08-31 12:21:12 | D | + Adding low-rank branches to single_transformer_blocks.31.proj_mlp +25-08-31 12:21:12 | D | - Calibrating low-rank branch for single_transformer_blocks.31.proj_out.linears.1 +25-08-31 12:21:12 | D | + w: sfp4_e2m1_all +25-08-31 12:21:12 | D | + x: sfp4_e2m1_all +25-08-31 12:21:12 | D | + y: None +25-08-31 12:21:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:21:12 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:21:12 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:21:17 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:21:49 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:21:49 | D | - error = [ 4698.1073, 4665.5351, 4642.8160, 4627.1274, 4618.0106, 4608.7823, 4597.7849, 4594.6210, 4589.9858, 4582.8491] +25-08-31 12:21:49 | D | - best error = [ 4698.1073, 4665.5351, 4642.8160, 4627.1274, 4618.0106, 4608.7823, 4597.7849, 4594.6210, 4589.9858, 4582.8491] +25-08-31 12:22:02 | D | - iter = [ 10, 11, 12, 13] +25-08-31 12:22:02 | D | - error = [ 4571.0003, 4564.8038, 4560.2009, 4563.9198] +25-08-31 12:22:02 | D | - best error = [ 4571.0003, 4564.8038, 4560.2009, 4560.2009] +25-08-31 12:22:02 | D | + Adding low-rank branches to single_transformer_blocks.31.proj_out.linears.1 +25-08-31 12:22:20 | D | - Calibrating low-rank branches of block single_transformer_blocks.32 +25-08-31 12:22:20 | D | - Calibrating low-rank branch for single_transformer_blocks.32.attn.to_q, single_transformer_blocks.32.attn.to_k, single_transformer_blocks.32.attn.to_v +25-08-31 12:22:20 | D | + w: sfp4_e2m1_all +25-08-31 12:22:20 | D | + x: sfp4_e2m1_all +25-08-31 12:22:20 | D | + y: None +25-08-31 12:22:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:22:20 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:22:20 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:22:22 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:22:39 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 12:22:39 | D | - error = [68507.8598, 66947.6634, 66094.3713, 65856.5675, 65524.2079, 65116.3085, 64830.5271, 65144.8994] +25-08-31 12:22:39 | D | - best error = [68507.8598, 66947.6634, 66094.3713, 65856.5675, 65524.2079, 65116.3085, 64830.5271, 64830.5271] +25-08-31 12:22:39 | D | + Adding low-rank branches to single_transformer_blocks.32.attn.to_q, single_transformer_blocks.32.attn.to_k, single_transformer_blocks.32.attn.to_v +25-08-31 12:22:39 | D | - Calibrating low-rank branch for single_transformer_blocks.32.proj_out.linears.0 +25-08-31 12:22:39 | D | + w: sfp4_e2m1_all +25-08-31 12:22:39 | D | + x: sfp4_e2m1_all +25-08-31 12:22:39 | D | + y: None +25-08-31 12:22:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:22:39 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:22:39 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:22:41 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:22:56 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:22:56 | D | - error = [ 3458.2928, 3413.3687, 3393.3058, 3364.8392, 3354.9884, 3348.4820, 3344.9170, 3340.0410, 3329.6992, 3321.6070] +25-08-31 12:22:56 | D | - best error = [ 3458.2928, 3413.3687, 3393.3058, 3364.8392, 3354.9884, 3348.4820, 3344.9170, 3340.0410, 3329.6992, 3321.6070] +25-08-31 12:23:01 | D | - iter = [ 10, 11, 12, 13] +25-08-31 12:23:01 | D | - error = [ 3320.7206, 3313.7615, 3312.0617, 3312.9167] +25-08-31 12:23:01 | D | - best error = [ 3320.7206, 3313.7615, 3312.0617, 3312.0617] +25-08-31 12:23:02 | D | + Adding low-rank branches to single_transformer_blocks.32.proj_out.linears.0 +25-08-31 12:23:02 | D | - Calibrating low-rank branch for single_transformer_blocks.32.proj_mlp +25-08-31 12:23:02 | D | + w: sfp4_e2m1_all +25-08-31 12:23:02 | D | + x: sfp4_e2m1_all +25-08-31 12:23:02 | D | + y: None +25-08-31 12:23:02 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:23:02 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:23:02 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:23:04 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:23:25 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:23:25 | D | - error = [ 4011.1407, 3995.1561, 3982.5799, 3973.7457, 3967.8889, 3965.4545, 3959.8045, 3957.5356, 3954.3589, 3950.2504] +25-08-31 12:23:25 | D | - best error = [ 4011.1407, 3995.1561, 3982.5799, 3973.7457, 3967.8889, 3965.4545, 3959.8045, 3957.5356, 3954.3589, 3950.2504] +25-08-31 12:23:47 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 12:23:47 | D | - error = [ 3949.0144, 3945.1496, 3942.0332, 3939.7769, 3937.2573, 3934.3988, 3933.7361, 3932.7990, 3930.9134, 3929.1701] +25-08-31 12:23:47 | D | - best error = [ 3949.0144, 3945.1496, 3942.0332, 3939.7769, 3937.2573, 3934.3988, 3933.7361, 3932.7990, 3930.9134, 3929.1701] +25-08-31 12:24:08 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-31 12:24:08 | D | - error = [ 3928.2984, 3928.0895, 3927.8994, 3927.4438, 3927.0011, 3926.3469, 3925.2972, 3924.7739, 3924.6436, 3924.1557] +25-08-31 12:24:08 | D | - best error = [ 3928.2984, 3928.0895, 3927.8994, 3927.4438, 3927.0011, 3926.3469, 3925.2972, 3924.7739, 3924.6436, 3924.1557] +25-08-31 12:24:12 | D | - iter = [ 30, 31] +25-08-31 12:24:12 | D | - error = [ 3924.0892, 3924.1310] +25-08-31 12:24:12 | D | - best error = [ 3924.0892, 3924.0892] +25-08-31 12:24:13 | D | + Adding low-rank branches to single_transformer_blocks.32.proj_mlp +25-08-31 12:24:13 | D | - Calibrating low-rank branch for single_transformer_blocks.32.proj_out.linears.1 +25-08-31 12:24:13 | D | + w: sfp4_e2m1_all +25-08-31 12:24:13 | D | + x: sfp4_e2m1_all +25-08-31 12:24:13 | D | + y: None +25-08-31 12:24:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:24:13 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:24:13 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:24:18 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:24:49 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:24:49 | D | - error = [ 1715.1824, 1706.6812, 1700.6925, 1694.9419, 1689.9034, 1686.7358, 1682.8534, 1681.4184, 1680.8793, 1679.4186] +25-08-31 12:24:49 | D | - best error = [ 1715.1824, 1706.6812, 1700.6925, 1694.9419, 1689.9034, 1686.7358, 1682.8534, 1681.4184, 1680.8793, 1679.4186] +25-08-31 12:25:21 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 12:25:21 | D | - error = [ 1677.4711, 1676.7202, 1676.5559, 1676.0195, 1675.9418, 1674.6021, 1674.2113, 1673.3602, 1672.7872, 1673.2092] +25-08-31 12:25:21 | D | - best error = [ 1677.4711, 1676.7202, 1676.5559, 1676.0195, 1675.9418, 1674.6021, 1674.2113, 1673.3602, 1672.7872, 1672.7872] +25-08-31 12:25:21 | D | + Adding low-rank branches to single_transformer_blocks.32.proj_out.linears.1 +25-08-31 12:25:39 | D | - Calibrating low-rank branches of block single_transformer_blocks.33 +25-08-31 12:25:39 | D | - Calibrating low-rank branch for single_transformer_blocks.33.attn.to_q, single_transformer_blocks.33.attn.to_k, single_transformer_blocks.33.attn.to_v +25-08-31 12:25:39 | D | + w: sfp4_e2m1_all +25-08-31 12:25:39 | D | + x: sfp4_e2m1_all +25-08-31 12:25:39 | D | + y: None +25-08-31 12:25:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:25:39 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:25:39 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:25:40 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:25:49 | D | - iter = [ 0, 1, 2, 3] +25-08-31 12:25:49 | D | - error = [64751.1599, 63582.7478, 63143.7502, 63212.1710] +25-08-31 12:25:49 | D | - best error = [64751.1599, 63582.7478, 63143.7502, 63143.7502] +25-08-31 12:25:49 | D | + Adding low-rank branches to single_transformer_blocks.33.attn.to_q, single_transformer_blocks.33.attn.to_k, single_transformer_blocks.33.attn.to_v +25-08-31 12:25:49 | D | - Calibrating low-rank branch for single_transformer_blocks.33.proj_out.linears.0 +25-08-31 12:25:49 | D | + w: sfp4_e2m1_all +25-08-31 12:25:49 | D | + x: sfp4_e2m1_all +25-08-31 12:25:49 | D | + y: None +25-08-31 12:25:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:25:49 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:25:49 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:25:51 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:26:02 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-31 12:26:02 | D | - error = [ 1721.8277, 1699.2662, 1688.4180, 1684.5795, 1679.8190, 1674.0267, 1670.5291, 1673.6227] +25-08-31 12:26:02 | D | - best error = [ 1721.8277, 1699.2662, 1688.4180, 1684.5795, 1679.8190, 1674.0267, 1670.5291, 1670.5291] +25-08-31 12:26:03 | D | + Adding low-rank branches to single_transformer_blocks.33.proj_out.linears.0 +25-08-31 12:26:03 | D | - Calibrating low-rank branch for single_transformer_blocks.33.proj_mlp +25-08-31 12:26:03 | D | + w: sfp4_e2m1_all +25-08-31 12:26:03 | D | + x: sfp4_e2m1_all +25-08-31 12:26:03 | D | + y: None +25-08-31 12:26:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:26:03 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:26:03 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:26:05 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:26:18 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-31 12:26:18 | D | - error = [ 4062.0097, 4040.0687, 4027.3905, 4017.4470, 4007.9089, 4013.6985] +25-08-31 12:26:18 | D | - best error = [ 4062.0097, 4040.0687, 4027.3905, 4017.4470, 4007.9089, 4007.9089] +25-08-31 12:26:18 | D | + Adding low-rank branches to single_transformer_blocks.33.proj_mlp +25-08-31 12:26:18 | D | - Calibrating low-rank branch for single_transformer_blocks.33.proj_out.linears.1 +25-08-31 12:26:18 | D | + w: sfp4_e2m1_all +25-08-31 12:26:18 | D | + x: sfp4_e2m1_all +25-08-31 12:26:18 | D | + y: None +25-08-31 12:26:18 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:26:18 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:26:18 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:26:23 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:26:53 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:26:53 | D | - error = [ 4331.0890, 4301.8554, 4282.6135, 4266.7351, 4253.6201, 4238.8690, 4230.8623, 4222.3576, 4217.8189, 4217.7240] +25-08-31 12:26:53 | D | - best error = [ 4331.0890, 4301.8554, 4282.6135, 4266.7351, 4253.6201, 4238.8690, 4230.8623, 4222.3576, 4217.8189, 4217.7240] +25-08-31 12:27:26 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 12:27:26 | D | - error = [ 4213.6970, 4209.5197, 4203.0917, 4199.9119, 4194.5283, 4193.4907, 4191.5539, 4188.1545, 4182.5676, 4183.2648] +25-08-31 12:27:26 | D | - best error = [ 4213.6970, 4209.5197, 4203.0917, 4199.9119, 4194.5283, 4193.4907, 4191.5539, 4188.1545, 4182.5676, 4182.5676] +25-08-31 12:27:26 | D | + Adding low-rank branches to single_transformer_blocks.33.proj_out.linears.1 +25-08-31 12:27:44 | D | - Calibrating low-rank branches of block single_transformer_blocks.34 +25-08-31 12:27:44 | D | - Calibrating low-rank branch for single_transformer_blocks.34.attn.to_q, single_transformer_blocks.34.attn.to_k, single_transformer_blocks.34.attn.to_v +25-08-31 12:27:44 | D | + w: sfp4_e2m1_all +25-08-31 12:27:44 | D | + x: sfp4_e2m1_all +25-08-31 12:27:44 | D | + y: None +25-08-31 12:27:44 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:27:44 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:27:44 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:27:45 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:28:07 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:28:07 | D | - error = [84138.0260, 82157.6210, 81769.2989, 81562.8527, 81231.2641, 81148.4100, 80839.4993, 80546.5586, 80034.1495, 80842.7388] +25-08-31 12:28:07 | D | - best error = [84138.0260, 82157.6210, 81769.2989, 81562.8527, 81231.2641, 81148.4100, 80839.4993, 80546.5586, 80034.1495, 80034.1495] +25-08-31 12:28:07 | D | + Adding low-rank branches to single_transformer_blocks.34.attn.to_q, single_transformer_blocks.34.attn.to_k, single_transformer_blocks.34.attn.to_v +25-08-31 12:28:07 | D | - Calibrating low-rank branch for single_transformer_blocks.34.proj_out.linears.0 +25-08-31 12:28:07 | D | + w: sfp4_e2m1_all +25-08-31 12:28:07 | D | + x: sfp4_e2m1_all +25-08-31 12:28:07 | D | + y: None +25-08-31 12:28:07 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:28:07 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:28:07 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:28:09 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:28:22 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-31 12:28:22 | D | - error = [ 2180.7634, 2152.7894, 2128.3094, 2111.6272, 2097.0936, 2095.2435, 2087.7325, 2078.9574, 2079.6720] +25-08-31 12:28:22 | D | - best error = [ 2180.7634, 2152.7894, 2128.3094, 2111.6272, 2097.0936, 2095.2435, 2087.7325, 2078.9574, 2078.9574] +25-08-31 12:28:22 | D | + Adding low-rank branches to single_transformer_blocks.34.proj_out.linears.0 +25-08-31 12:28:22 | D | - Calibrating low-rank branch for single_transformer_blocks.34.proj_mlp +25-08-31 12:28:22 | D | + w: sfp4_e2m1_all +25-08-31 12:28:23 | D | + x: sfp4_e2m1_all +25-08-31 12:28:23 | D | + y: None +25-08-31 12:28:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:28:23 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:28:23 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:28:24 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:28:45 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:28:45 | D | - error = [ 4431.8556, 4406.3663, 4391.9281, 4379.3445, 4370.4789, 4361.7735, 4355.7826, 4349.6342, 4346.9060, 4342.6202] +25-08-31 12:28:45 | D | - best error = [ 4431.8556, 4406.3663, 4391.9281, 4379.3445, 4370.4789, 4361.7735, 4355.7826, 4349.6342, 4346.9060, 4342.6202] +25-08-31 12:29:07 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 12:29:07 | D | - error = [ 4339.2944, 4336.6785, 4334.9768, 4333.4649, 4330.7798, 4329.5325, 4328.1063, 4325.9810, 4324.1554, 4322.3160] +25-08-31 12:29:07 | D | - best error = [ 4339.2944, 4336.6785, 4334.9768, 4333.4649, 4330.7798, 4329.5325, 4328.1063, 4325.9810, 4324.1554, 4322.3160] +25-08-31 12:29:16 | D | - iter = [ 20, 21, 22, 23] +25-08-31 12:29:16 | D | - error = [ 4321.2889, 4319.9032, 4318.7954, 4318.7966] +25-08-31 12:29:16 | D | - best error = [ 4321.2889, 4319.9032, 4318.7954, 4318.7954] +25-08-31 12:29:16 | D | + Adding low-rank branches to single_transformer_blocks.34.proj_mlp +25-08-31 12:29:16 | D | - Calibrating low-rank branch for single_transformer_blocks.34.proj_out.linears.1 +25-08-31 12:29:16 | D | + w: sfp4_e2m1_all +25-08-31 12:29:16 | D | + x: sfp4_e2m1_all +25-08-31 12:29:16 | D | + y: None +25-08-31 12:29:16 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:29:16 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:29:16 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:29:21 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:29:35 | D | - iter = [ 0, 1, 2, 3] +25-08-31 12:29:35 | D | - error = [ 2026.4631, 2014.2449, 2004.1072, 2004.6295] +25-08-31 12:29:35 | D | - best error = [ 2026.4631, 2014.2449, 2004.1072, 2004.1072] +25-08-31 12:29:35 | D | + Adding low-rank branches to single_transformer_blocks.34.proj_out.linears.1 +25-08-31 12:29:53 | D | - Calibrating low-rank branches of block single_transformer_blocks.35 +25-08-31 12:29:53 | D | - Calibrating low-rank branch for single_transformer_blocks.35.attn.to_q, single_transformer_blocks.35.attn.to_k, single_transformer_blocks.35.attn.to_v +25-08-31 12:29:53 | D | + w: sfp4_e2m1_all +25-08-31 12:29:53 | D | + x: sfp4_e2m1_all +25-08-31 12:29:53 | D | + y: None +25-08-31 12:29:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:29:53 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:29:53 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:29:54 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:30:02 | D | - iter = [ 0, 1, 2, 3] +25-08-31 12:30:02 | D | - error = [86695.2260, 85685.6215, 84436.9618, 84773.5264] +25-08-31 12:30:02 | D | - best error = [86695.2260, 85685.6215, 84436.9618, 84436.9618] +25-08-31 12:30:03 | D | + Adding low-rank branches to single_transformer_blocks.35.attn.to_q, single_transformer_blocks.35.attn.to_k, single_transformer_blocks.35.attn.to_v +25-08-31 12:30:03 | D | - Calibrating low-rank branch for single_transformer_blocks.35.proj_out.linears.0 +25-08-31 12:30:03 | D | + w: sfp4_e2m1_all +25-08-31 12:30:03 | D | + x: sfp4_e2m1_all +25-08-31 12:30:03 | D | + y: None +25-08-31 12:30:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:30:03 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:30:03 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:30:04 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:30:19 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:30:19 | D | - error = [ 2260.4904, 2232.4605, 2211.1149, 2204.3729, 2199.3466, 2195.4472, 2191.4413, 2184.1451, 2174.4606, 2186.3172] +25-08-31 12:30:19 | D | - best error = [ 2260.4904, 2232.4605, 2211.1149, 2204.3729, 2199.3466, 2195.4472, 2191.4413, 2184.1451, 2174.4606, 2174.4606] +25-08-31 12:30:19 | D | + Adding low-rank branches to single_transformer_blocks.35.proj_out.linears.0 +25-08-31 12:30:19 | D | - Calibrating low-rank branch for single_transformer_blocks.35.proj_mlp +25-08-31 12:30:19 | D | + w: sfp4_e2m1_all +25-08-31 12:30:19 | D | + x: sfp4_e2m1_all +25-08-31 12:30:19 | D | + y: None +25-08-31 12:30:19 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:30:19 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:30:19 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:30:20 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:30:41 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:30:41 | D | - error = [ 3883.2938, 3862.9344, 3848.6629, 3837.8237, 3830.9695, 3826.7686, 3822.2249, 3817.0639, 3813.4505, 3809.5127] +25-08-31 12:30:41 | D | - best error = [ 3883.2938, 3862.9344, 3848.6629, 3837.8237, 3830.9695, 3826.7686, 3822.2249, 3817.0639, 3813.4505, 3809.5127] +25-08-31 12:30:53 | D | - iter = [ 10, 11, 12, 13, 14, 15] +25-08-31 12:30:53 | D | - error = [ 3807.4155, 3806.0160, 3804.5900, 3803.9984, 3801.0748, 3801.1926] +25-08-31 12:30:53 | D | - best error = [ 3807.4155, 3806.0160, 3804.5900, 3803.9984, 3801.0748, 3801.0748] +25-08-31 12:30:54 | D | + Adding low-rank branches to single_transformer_blocks.35.proj_mlp +25-08-31 12:30:54 | D | - Calibrating low-rank branch for single_transformer_blocks.35.proj_out.linears.1 +25-08-31 12:30:54 | D | + w: sfp4_e2m1_all +25-08-31 12:30:54 | D | + x: sfp4_e2m1_all +25-08-31 12:30:54 | D | + y: None +25-08-31 12:30:54 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:30:54 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:30:54 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:30:59 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:31:30 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:31:30 | D | - error = [ 4231.1670, 4195.6375, 4171.8552, 4150.5255, 4148.4127, 4137.9901, 4131.5670, 4125.1446, 4115.4783, 4109.2965] +25-08-31 12:31:30 | D | - best error = [ 4231.1670, 4195.6375, 4171.8552, 4150.5255, 4148.4127, 4137.9901, 4131.5670, 4125.1446, 4115.4783, 4109.2965] +25-08-31 12:31:37 | D | - iter = [ 10, 11] +25-08-31 12:31:37 | D | - error = [ 4098.0505, 4098.9547] +25-08-31 12:31:37 | D | - best error = [ 4098.0505, 4098.0505] +25-08-31 12:31:37 | D | + Adding low-rank branches to single_transformer_blocks.35.proj_out.linears.1 +25-08-31 12:31:55 | D | - Calibrating low-rank branches of block single_transformer_blocks.36 +25-08-31 12:31:55 | D | - Calibrating low-rank branch for single_transformer_blocks.36.attn.to_q, single_transformer_blocks.36.attn.to_k, single_transformer_blocks.36.attn.to_v +25-08-31 12:31:55 | D | + w: sfp4_e2m1_all +25-08-31 12:31:55 | D | + x: sfp4_e2m1_all +25-08-31 12:31:55 | D | + y: None +25-08-31 12:31:55 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:31:55 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:31:55 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:31:56 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:32:12 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-31 12:32:12 | D | - error = [35105.1103, 34550.6302, 34031.0804, 33976.4982, 33655.1928, 33610.4936, 33818.8605] +25-08-31 12:32:12 | D | - best error = [35105.1103, 34550.6302, 34031.0804, 33976.4982, 33655.1928, 33610.4936, 33610.4936] +25-08-31 12:32:12 | D | + Adding low-rank branches to single_transformer_blocks.36.attn.to_q, single_transformer_blocks.36.attn.to_k, single_transformer_blocks.36.attn.to_v +25-08-31 12:32:12 | D | - Calibrating low-rank branch for single_transformer_blocks.36.proj_out.linears.0 +25-08-31 12:32:12 | D | + w: sfp4_e2m1_all +25-08-31 12:32:12 | D | + x: sfp4_e2m1_all +25-08-31 12:32:12 | D | + y: None +25-08-31 12:32:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:32:12 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:32:12 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:32:13 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:32:19 | D | - iter = [ 0, 1, 2, 3] +25-08-31 12:32:19 | D | - error = [ 3662.1596, 3604.5355, 3566.4170, 3601.5234] +25-08-31 12:32:19 | D | - best error = [ 3662.1596, 3604.5355, 3566.4170, 3566.4170] +25-08-31 12:32:19 | D | + Adding low-rank branches to single_transformer_blocks.36.proj_out.linears.0 +25-08-31 12:32:20 | D | - Calibrating low-rank branch for single_transformer_blocks.36.proj_mlp +25-08-31 12:32:20 | D | + w: sfp4_e2m1_all +25-08-31 12:32:20 | D | + x: sfp4_e2m1_all +25-08-31 12:32:20 | D | + y: None +25-08-31 12:32:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:32:20 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:32:20 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:32:21 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:32:42 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:32:42 | D | - error = [ 4151.7690, 4130.0131, 4116.1069, 4107.3081, 4099.9329, 4095.0695, 4089.8232, 4084.1535, 4080.7529, 4077.1026] +25-08-31 12:32:42 | D | - best error = [ 4151.7690, 4130.0131, 4116.1069, 4107.3081, 4099.9329, 4095.0695, 4089.8232, 4084.1535, 4080.7529, 4077.1026] +25-08-31 12:33:03 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-31 12:33:03 | D | - error = [ 4074.3925, 4070.7460, 4068.3933, 4066.5505, 4065.3159, 4063.5483, 4062.1445, 4060.2973, 4059.3847, 4059.3129] +25-08-31 12:33:03 | D | - best error = [ 4074.3925, 4070.7460, 4068.3933, 4066.5505, 4065.3159, 4063.5483, 4062.1445, 4060.2973, 4059.3847, 4059.3129] +25-08-31 12:33:07 | D | - iter = [ 20, 21] +25-08-31 12:33:07 | D | - error = [ 4057.7750, 4066.3343] +25-08-31 12:33:07 | D | - best error = [ 4057.7750, 4057.7750] +25-08-31 12:33:07 | D | + Adding low-rank branches to single_transformer_blocks.36.proj_mlp +25-08-31 12:33:07 | D | - Calibrating low-rank branch for single_transformer_blocks.36.proj_out.linears.1 +25-08-31 12:33:07 | D | + w: sfp4_e2m1_all +25-08-31 12:33:07 | D | + x: sfp4_e2m1_all +25-08-31 12:33:07 | D | + y: None +25-08-31 12:33:07 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:33:07 | D | + finished parsing calibration arguments, ram usage: 14.5 +25-08-31 12:33:07 | D | + finished resetting calibrator, ram usage: 14.5 +25-08-31 12:33:12 | D | + finished calculating the original outputs, ram usage: 14.5 +25-08-31 12:33:44 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:33:44 | D | - error = [ 5157.9021, 5123.9241, 5094.5508, 5065.5691, 5050.1327, 5030.6781, 5016.6236, 5014.9520, 5010.9193, 4998.7262] +25-08-31 12:33:44 | D | - best error = [ 5157.9021, 5123.9241, 5094.5508, 5065.5691, 5050.1327, 5030.6781, 5016.6236, 5014.9520, 5010.9193, 4998.7262] +25-08-31 12:34:02 | D | - iter = [ 10, 11, 12, 13, 14, 15] +25-08-31 12:34:02 | D | - error = [ 4992.9218, 4992.6370, 4989.9944, 4988.0794, 4983.4079, 4984.5423] +25-08-31 12:34:02 | D | - best error = [ 4992.9218, 4992.6370, 4989.9944, 4988.0794, 4983.4079, 4983.4079] +25-08-31 12:34:03 | D | + Adding low-rank branches to single_transformer_blocks.36.proj_out.linears.1 +25-08-31 12:34:20 | D | - Calibrating low-rank branches of block single_transformer_blocks.37 +25-08-31 12:34:20 | D | - Calibrating low-rank branch for single_transformer_blocks.37.attn.to_q, single_transformer_blocks.37.attn.to_k, single_transformer_blocks.37.attn.to_v +25-08-31 12:34:20 | D | + w: sfp4_e2m1_all +25-08-31 12:34:20 | D | + x: sfp4_e2m1_all +25-08-31 12:34:20 | D | + y: None +25-08-31 12:34:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:34:20 | D | + finished parsing calibration arguments, ram usage: 14.2 +25-08-31 12:34:20 | D | + finished resetting calibrator, ram usage: 14.2 +25-08-31 12:34:22 | D | + finished calculating the original outputs, ram usage: 14.2 +25-08-31 12:34:28 | D | - iter = [ 0, 1, 2] +25-08-31 12:34:28 | D | - error = [ 9493.3981, 9264.1740, 9732.4985] +25-08-31 12:34:28 | D | - best error = [ 9493.3981, 9264.1740, 9264.1740] +25-08-31 12:34:29 | D | + Adding low-rank branches to single_transformer_blocks.37.attn.to_q, single_transformer_blocks.37.attn.to_k, single_transformer_blocks.37.attn.to_v +25-08-31 12:34:29 | D | - Calibrating low-rank branch for single_transformer_blocks.37.proj_out.linears.0 +25-08-31 12:34:29 | D | + w: sfp4_e2m1_all +25-08-31 12:34:29 | D | + x: sfp4_e2m1_all +25-08-31 12:34:29 | D | + y: None +25-08-31 12:34:29 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:34:29 | D | + finished parsing calibration arguments, ram usage: 14.2 +25-08-31 12:34:29 | D | + finished resetting calibrator, ram usage: 14.2 +25-08-31 12:34:30 | D | + finished calculating the original outputs, ram usage: 14.2 +25-08-31 12:34:37 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-31 12:34:37 | D | - error = [ 1322.1185, 1300.4793, 1284.9887, 1278.1374, 1287.9975] +25-08-31 12:34:37 | D | - best error = [ 1322.1185, 1300.4793, 1284.9887, 1278.1374, 1278.1374] +25-08-31 12:34:37 | D | + Adding low-rank branches to single_transformer_blocks.37.proj_out.linears.0 +25-08-31 12:34:37 | D | - Calibrating low-rank branch for single_transformer_blocks.37.proj_mlp +25-08-31 12:34:37 | D | + w: sfp4_e2m1_all +25-08-31 12:34:37 | D | + x: sfp4_e2m1_all +25-08-31 12:34:37 | D | + y: None +25-08-31 12:34:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:34:37 | D | + finished parsing calibration arguments, ram usage: 14.2 +25-08-31 12:34:37 | D | + finished resetting calibrator, ram usage: 14.2 +25-08-31 12:34:39 | D | + finished calculating the original outputs, ram usage: 14.2 +25-08-31 12:35:00 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:35:00 | D | - error = [ 3282.6032, 3267.6808, 3259.2789, 3253.1560, 3249.3561, 3245.3712, 3241.3721, 3237.2077, 3236.0170, 3232.9850] +25-08-31 12:35:00 | D | - best error = [ 3282.6032, 3267.6808, 3259.2789, 3253.1560, 3249.3561, 3245.3712, 3241.3721, 3237.2077, 3236.0170, 3232.9850] +25-08-31 12:35:12 | D | - iter = [ 10, 11, 12, 13, 14, 15] +25-08-31 12:35:12 | D | - error = [ 3230.7895, 3230.0706, 3228.1654, 3227.0119, 3224.6789, 3225.7799] +25-08-31 12:35:12 | D | - best error = [ 3230.7895, 3230.0706, 3228.1654, 3227.0119, 3224.6789, 3224.6789] +25-08-31 12:35:13 | D | + Adding low-rank branches to single_transformer_blocks.37.proj_mlp +25-08-31 12:35:13 | D | - Calibrating low-rank branch for single_transformer_blocks.37.proj_out.linears.1 +25-08-31 12:35:13 | D | + w: sfp4_e2m1_all +25-08-31 12:35:13 | D | + x: sfp4_e2m1_all +25-08-31 12:35:13 | D | + y: None +25-08-31 12:35:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-31 12:35:13 | D | + finished parsing calibration arguments, ram usage: 14.2 +25-08-31 12:35:13 | D | + finished resetting calibrator, ram usage: 14.2 +25-08-31 12:35:17 | D | + finished calculating the original outputs, ram usage: 14.2 +25-08-31 12:35:49 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-31 12:35:49 | D | - error = [ 2377.6007, 2346.6469, 2324.7186, 2308.0326, 2296.7320, 2290.2091, 2285.3348, 2276.8674, 2270.0577, 2264.1956] +25-08-31 12:35:49 | D | - best error = [ 2377.6007, 2346.6469, 2324.7186, 2308.0326, 2296.7320, 2290.2091, 2285.3348, 2276.8674, 2270.0577, 2264.1956] +25-08-31 12:36:18 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18] +25-08-31 12:36:18 | D | - error = [ 2261.2503, 2259.6218, 2258.6209, 2256.2520, 2254.6184, 2251.1556, 2249.9677, 2244.4326, 2256.7111] +25-08-31 12:36:18 | D | - best error = [ 2261.2503, 2259.6218, 2258.6209, 2256.2520, 2254.6184, 2251.1556, 2249.9677, 2244.4326, 2244.4326] +25-08-31 12:36:19 | D | + Adding low-rank branches to single_transformer_blocks.37.proj_out.linears.1 +25-08-31 12:36:20 | D | - Calibrating weights: block transformer_blocks.0 +25-08-31 12:36:20 | D | - Calibrating transformer_blocks.0.norm1.linear.weight quantizer +25-08-31 12:36:20 | D | - Calibrating transformer_blocks.0.norm1_context.linear.weight quantizer +25-08-31 12:36:20 | D | - Calibrating transformer_blocks.0.attn.to_q.weight quantizer +25-08-31 12:36:20 | D | - Calibrating transformer_blocks.0.attn.to_k.weight quantizer +25-08-31 12:36:20 | D | - Calibrating transformer_blocks.0.attn.to_v.weight quantizer +25-08-31 12:36:20 | D | - Calibrating transformer_blocks.0.attn.add_q_proj.weight quantizer +25-08-31 12:36:21 | D | - Calibrating transformer_blocks.0.attn.add_k_proj.weight quantizer +25-08-31 12:36:21 | D | - Calibrating transformer_blocks.0.attn.add_v_proj.weight quantizer +25-08-31 12:36:21 | D | - Calibrating transformer_blocks.0.attn.to_out.0.weight quantizer +25-08-31 12:36:21 | D | - Calibrating transformer_blocks.0.attn.to_add_out.weight quantizer +25-08-31 12:36:21 | D | - Calibrating transformer_blocks.0.ff.net.0.proj.weight quantizer +25-08-31 12:36:22 | D | - Calibrating transformer_blocks.0.ff.net.2.weight quantizer +25-08-31 12:36:22 | D | - Calibrating transformer_blocks.0.ff_context.net.0.proj.weight quantizer +25-08-31 12:36:22 | D | - Calibrating transformer_blocks.0.ff_context.net.2.weight quantizer +25-08-31 12:36:22 | D | - Calibrating weights: block transformer_blocks.1 +25-08-31 12:36:22 | D | - Calibrating transformer_blocks.1.norm1.linear.weight quantizer +25-08-31 12:36:22 | D | - Calibrating transformer_blocks.1.norm1_context.linear.weight quantizer +25-08-31 12:36:23 | D | - Calibrating transformer_blocks.1.attn.to_q.weight quantizer +25-08-31 12:36:23 | D | - Calibrating transformer_blocks.1.attn.to_k.weight quantizer +25-08-31 12:36:23 | D | - Calibrating transformer_blocks.1.attn.to_v.weight quantizer +25-08-31 12:36:23 | D | - Calibrating transformer_blocks.1.attn.add_q_proj.weight quantizer +25-08-31 12:36:23 | D | - Calibrating transformer_blocks.1.attn.add_k_proj.weight quantizer +25-08-31 12:36:23 | D | - Calibrating transformer_blocks.1.attn.add_v_proj.weight quantizer +25-08-31 12:36:24 | D | - Calibrating transformer_blocks.1.attn.to_out.0.weight quantizer +25-08-31 12:36:24 | D | - Calibrating transformer_blocks.1.attn.to_add_out.weight quantizer +25-08-31 12:36:24 | D | - Calibrating transformer_blocks.1.ff.net.0.proj.weight quantizer +25-08-31 12:36:24 | D | - Calibrating transformer_blocks.1.ff.net.2.weight quantizer +25-08-31 12:36:24 | D | - Calibrating transformer_blocks.1.ff_context.net.0.proj.weight quantizer +25-08-31 12:36:25 | D | - Calibrating transformer_blocks.1.ff_context.net.2.weight quantizer +25-08-31 12:36:25 | D | - Calibrating weights: block transformer_blocks.2 +25-08-31 12:36:25 | D | - Calibrating transformer_blocks.2.norm1.linear.weight quantizer +25-08-31 12:36:25 | D | - Calibrating transformer_blocks.2.norm1_context.linear.weight quantizer +25-08-31 12:36:25 | D | - Calibrating transformer_blocks.2.attn.to_q.weight quantizer +25-08-31 12:36:25 | D | - Calibrating transformer_blocks.2.attn.to_k.weight quantizer +25-08-31 12:36:26 | D | - Calibrating transformer_blocks.2.attn.to_v.weight quantizer +25-08-31 12:36:26 | D | - Calibrating transformer_blocks.2.attn.add_q_proj.weight quantizer +25-08-31 12:36:26 | D | - Calibrating transformer_blocks.2.attn.add_k_proj.weight quantizer +25-08-31 12:36:26 | D | - Calibrating transformer_blocks.2.attn.add_v_proj.weight quantizer +25-08-31 12:36:26 | D | - Calibrating transformer_blocks.2.attn.to_out.0.weight quantizer +25-08-31 12:36:26 | D | - Calibrating transformer_blocks.2.attn.to_add_out.weight quantizer +25-08-31 12:36:27 | D | - Calibrating transformer_blocks.2.ff.net.0.proj.weight quantizer +25-08-31 12:36:27 | D | - Calibrating transformer_blocks.2.ff.net.2.weight quantizer +25-08-31 12:36:27 | D | - Calibrating transformer_blocks.2.ff_context.net.0.proj.weight quantizer +25-08-31 12:36:27 | D | - Calibrating transformer_blocks.2.ff_context.net.2.weight quantizer +25-08-31 12:36:27 | D | - Calibrating weights: block transformer_blocks.3 +25-08-31 12:36:27 | D | - Calibrating transformer_blocks.3.norm1.linear.weight quantizer +25-08-31 12:36:28 | D | - Calibrating transformer_blocks.3.norm1_context.linear.weight quantizer +25-08-31 12:36:28 | D | - Calibrating transformer_blocks.3.attn.to_q.weight quantizer +25-08-31 12:36:28 | D | - Calibrating transformer_blocks.3.attn.to_k.weight quantizer +25-08-31 12:36:28 | D | - Calibrating transformer_blocks.3.attn.to_v.weight quantizer +25-08-31 12:36:28 | D | - Calibrating transformer_blocks.3.attn.add_q_proj.weight quantizer +25-08-31 12:36:29 | D | - Calibrating transformer_blocks.3.attn.add_k_proj.weight quantizer +25-08-31 12:36:29 | D | - Calibrating transformer_blocks.3.attn.add_v_proj.weight quantizer +25-08-31 12:36:29 | D | - Calibrating transformer_blocks.3.attn.to_out.0.weight quantizer +25-08-31 12:36:29 | D | - Calibrating transformer_blocks.3.attn.to_add_out.weight quantizer +25-08-31 12:36:29 | D | - Calibrating transformer_blocks.3.ff.net.0.proj.weight quantizer +25-08-31 12:36:29 | D | - Calibrating transformer_blocks.3.ff.net.2.weight quantizer +25-08-31 12:36:30 | D | - Calibrating transformer_blocks.3.ff_context.net.0.proj.weight quantizer +25-08-31 12:36:30 | D | - Calibrating transformer_blocks.3.ff_context.net.2.weight quantizer +25-08-31 12:36:30 | D | - Calibrating weights: block transformer_blocks.4 +25-08-31 12:36:30 | D | - Calibrating transformer_blocks.4.norm1.linear.weight quantizer +25-08-31 12:36:30 | D | - Calibrating transformer_blocks.4.norm1_context.linear.weight quantizer +25-08-31 12:36:30 | D | - Calibrating transformer_blocks.4.attn.to_q.weight quantizer +25-08-31 12:36:31 | D | - Calibrating transformer_blocks.4.attn.to_k.weight quantizer +25-08-31 12:36:31 | D | - Calibrating transformer_blocks.4.attn.to_v.weight quantizer +25-08-31 12:36:31 | D | - Calibrating transformer_blocks.4.attn.add_q_proj.weight quantizer +25-08-31 12:36:31 | D | - Calibrating transformer_blocks.4.attn.add_k_proj.weight quantizer +25-08-31 12:36:31 | D | - Calibrating transformer_blocks.4.attn.add_v_proj.weight quantizer +25-08-31 12:36:32 | D | - Calibrating transformer_blocks.4.attn.to_out.0.weight quantizer +25-08-31 12:36:32 | D | - Calibrating transformer_blocks.4.attn.to_add_out.weight quantizer +25-08-31 12:36:32 | D | - Calibrating transformer_blocks.4.ff.net.0.proj.weight quantizer +25-08-31 12:36:32 | D | - Calibrating transformer_blocks.4.ff.net.2.weight quantizer +25-08-31 12:36:32 | D | - Calibrating transformer_blocks.4.ff_context.net.0.proj.weight quantizer +25-08-31 12:36:32 | D | - Calibrating transformer_blocks.4.ff_context.net.2.weight quantizer +25-08-31 12:36:33 | D | - Calibrating weights: block transformer_blocks.5 +25-08-31 12:36:33 | D | - Calibrating transformer_blocks.5.norm1.linear.weight quantizer +25-08-31 12:36:33 | D | - Calibrating transformer_blocks.5.norm1_context.linear.weight quantizer +25-08-31 12:36:33 | D | - Calibrating transformer_blocks.5.attn.to_q.weight quantizer +25-08-31 12:36:33 | D | - Calibrating transformer_blocks.5.attn.to_k.weight quantizer +25-08-31 12:36:33 | D | - Calibrating transformer_blocks.5.attn.to_v.weight quantizer +25-08-31 12:36:34 | D | - Calibrating transformer_blocks.5.attn.add_q_proj.weight quantizer +25-08-31 12:36:34 | D | - Calibrating transformer_blocks.5.attn.add_k_proj.weight quantizer +25-08-31 12:36:34 | D | - Calibrating transformer_blocks.5.attn.add_v_proj.weight quantizer +25-08-31 12:36:34 | D | - Calibrating transformer_blocks.5.attn.to_out.0.weight quantizer +25-08-31 12:36:34 | D | - Calibrating transformer_blocks.5.attn.to_add_out.weight quantizer +25-08-31 12:36:35 | D | - Calibrating transformer_blocks.5.ff.net.0.proj.weight quantizer +25-08-31 12:36:35 | D | - Calibrating transformer_blocks.5.ff.net.2.weight quantizer +25-08-31 12:36:35 | D | - Calibrating transformer_blocks.5.ff_context.net.0.proj.weight quantizer +25-08-31 12:36:35 | D | - Calibrating transformer_blocks.5.ff_context.net.2.weight quantizer +25-08-31 12:36:35 | D | - Calibrating weights: block transformer_blocks.6 +25-08-31 12:36:35 | D | - Calibrating transformer_blocks.6.norm1.linear.weight quantizer +25-08-31 12:36:35 | D | - Calibrating transformer_blocks.6.norm1_context.linear.weight quantizer +25-08-31 12:36:36 | D | - Calibrating transformer_blocks.6.attn.to_q.weight quantizer +25-08-31 12:36:36 | D | - Calibrating transformer_blocks.6.attn.to_k.weight quantizer +25-08-31 12:36:36 | D | - Calibrating transformer_blocks.6.attn.to_v.weight quantizer +25-08-31 12:36:36 | D | - Calibrating transformer_blocks.6.attn.add_q_proj.weight quantizer +25-08-31 12:36:36 | D | - Calibrating transformer_blocks.6.attn.add_k_proj.weight quantizer +25-08-31 12:36:37 | D | - Calibrating transformer_blocks.6.attn.add_v_proj.weight quantizer +25-08-31 12:36:37 | D | - Calibrating transformer_blocks.6.attn.to_out.0.weight quantizer +25-08-31 12:36:37 | D | - Calibrating transformer_blocks.6.attn.to_add_out.weight quantizer +25-08-31 12:36:37 | D | - Calibrating transformer_blocks.6.ff.net.0.proj.weight quantizer +25-08-31 12:36:37 | D | - Calibrating transformer_blocks.6.ff.net.2.weight quantizer +25-08-31 12:36:37 | D | - Calibrating transformer_blocks.6.ff_context.net.0.proj.weight quantizer +25-08-31 12:36:38 | D | - Calibrating transformer_blocks.6.ff_context.net.2.weight quantizer +25-08-31 12:36:38 | D | - Calibrating weights: block transformer_blocks.7 +25-08-31 12:36:38 | D | - Calibrating transformer_blocks.7.norm1.linear.weight quantizer +25-08-31 12:36:38 | D | - Calibrating transformer_blocks.7.norm1_context.linear.weight quantizer +25-08-31 12:36:38 | D | - Calibrating transformer_blocks.7.attn.to_q.weight quantizer +25-08-31 12:36:38 | D | - Calibrating transformer_blocks.7.attn.to_k.weight quantizer +25-08-31 12:36:39 | D | - Calibrating transformer_blocks.7.attn.to_v.weight quantizer +25-08-31 12:36:39 | D | - Calibrating transformer_blocks.7.attn.add_q_proj.weight quantizer +25-08-31 12:36:39 | D | - Calibrating transformer_blocks.7.attn.add_k_proj.weight quantizer +25-08-31 12:36:39 | D | - Calibrating transformer_blocks.7.attn.add_v_proj.weight quantizer +25-08-31 12:36:39 | D | - Calibrating transformer_blocks.7.attn.to_out.0.weight quantizer +25-08-31 12:36:40 | D | - Calibrating transformer_blocks.7.attn.to_add_out.weight quantizer +25-08-31 12:36:40 | D | - Calibrating transformer_blocks.7.ff.net.0.proj.weight quantizer +25-08-31 12:36:40 | D | - Calibrating transformer_blocks.7.ff.net.2.weight quantizer +25-08-31 12:36:40 | D | - Calibrating transformer_blocks.7.ff_context.net.0.proj.weight quantizer +25-08-31 12:36:40 | D | - Calibrating transformer_blocks.7.ff_context.net.2.weight quantizer +25-08-31 12:36:40 | D | - Calibrating weights: block transformer_blocks.8 +25-08-31 12:36:40 | D | - Calibrating transformer_blocks.8.norm1.linear.weight quantizer +25-08-31 12:36:41 | D | - Calibrating transformer_blocks.8.norm1_context.linear.weight quantizer +25-08-31 12:36:41 | D | - Calibrating transformer_blocks.8.attn.to_q.weight quantizer +25-08-31 12:36:41 | D | - Calibrating transformer_blocks.8.attn.to_k.weight quantizer +25-08-31 12:36:41 | D | - Calibrating transformer_blocks.8.attn.to_v.weight quantizer +25-08-31 12:36:41 | D | - Calibrating transformer_blocks.8.attn.add_q_proj.weight quantizer +25-08-31 12:36:42 | D | - Calibrating transformer_blocks.8.attn.add_k_proj.weight quantizer +25-08-31 12:36:42 | D | - Calibrating transformer_blocks.8.attn.add_v_proj.weight quantizer +25-08-31 12:36:42 | D | - Calibrating transformer_blocks.8.attn.to_out.0.weight quantizer +25-08-31 12:36:42 | D | - Calibrating transformer_blocks.8.attn.to_add_out.weight quantizer +25-08-31 12:36:42 | D | - Calibrating transformer_blocks.8.ff.net.0.proj.weight quantizer +25-08-31 12:36:43 | D | - Calibrating transformer_blocks.8.ff.net.2.weight quantizer +25-08-31 12:36:43 | D | - Calibrating transformer_blocks.8.ff_context.net.0.proj.weight quantizer +25-08-31 12:36:43 | D | - Calibrating transformer_blocks.8.ff_context.net.2.weight quantizer +25-08-31 12:36:43 | D | - Calibrating weights: block transformer_blocks.9 +25-08-31 12:36:43 | D | - Calibrating transformer_blocks.9.norm1.linear.weight quantizer +25-08-31 12:36:43 | D | - Calibrating transformer_blocks.9.norm1_context.linear.weight quantizer +25-08-31 12:36:43 | D | - Calibrating transformer_blocks.9.attn.to_q.weight quantizer +25-08-31 12:36:44 | D | - Calibrating transformer_blocks.9.attn.to_k.weight quantizer +25-08-31 12:36:44 | D | - Calibrating transformer_blocks.9.attn.to_v.weight quantizer +25-08-31 12:36:44 | D | - Calibrating transformer_blocks.9.attn.add_q_proj.weight quantizer +25-08-31 12:36:44 | D | - Calibrating transformer_blocks.9.attn.add_k_proj.weight quantizer +25-08-31 12:36:44 | D | - Calibrating transformer_blocks.9.attn.add_v_proj.weight quantizer +25-08-31 12:36:45 | D | - Calibrating transformer_blocks.9.attn.to_out.0.weight quantizer +25-08-31 12:36:45 | D | - Calibrating transformer_blocks.9.attn.to_add_out.weight quantizer +25-08-31 12:36:45 | D | - Calibrating transformer_blocks.9.ff.net.0.proj.weight quantizer +25-08-31 12:36:45 | D | - Calibrating transformer_blocks.9.ff.net.2.weight quantizer +25-08-31 12:36:45 | D | - Calibrating transformer_blocks.9.ff_context.net.0.proj.weight quantizer +25-08-31 12:36:45 | D | - Calibrating transformer_blocks.9.ff_context.net.2.weight quantizer +25-08-31 12:36:46 | D | - Calibrating weights: block transformer_blocks.10 +25-08-31 12:36:46 | D | - Calibrating transformer_blocks.10.norm1.linear.weight quantizer +25-08-31 12:36:46 | D | - Calibrating transformer_blocks.10.norm1_context.linear.weight quantizer +25-08-31 12:36:46 | D | - Calibrating transformer_blocks.10.attn.to_q.weight quantizer +25-08-31 12:36:46 | D | - Calibrating transformer_blocks.10.attn.to_k.weight quantizer +25-08-31 12:36:46 | D | - Calibrating transformer_blocks.10.attn.to_v.weight quantizer +25-08-31 12:36:47 | D | - Calibrating transformer_blocks.10.attn.add_q_proj.weight quantizer +25-08-31 12:36:47 | D | - Calibrating transformer_blocks.10.attn.add_k_proj.weight quantizer +25-08-31 12:36:47 | D | - Calibrating transformer_blocks.10.attn.add_v_proj.weight quantizer +25-08-31 12:36:47 | D | - Calibrating transformer_blocks.10.attn.to_out.0.weight quantizer +25-08-31 12:36:47 | D | - Calibrating transformer_blocks.10.attn.to_add_out.weight quantizer +25-08-31 12:36:48 | D | - Calibrating transformer_blocks.10.ff.net.0.proj.weight quantizer +25-08-31 12:36:48 | D | - Calibrating transformer_blocks.10.ff.net.2.weight quantizer +25-08-31 12:36:48 | D | - Calibrating transformer_blocks.10.ff_context.net.0.proj.weight quantizer +25-08-31 12:36:48 | D | - Calibrating transformer_blocks.10.ff_context.net.2.weight quantizer +25-08-31 12:36:48 | D | - Calibrating weights: block transformer_blocks.11 +25-08-31 12:36:48 | D | - Calibrating transformer_blocks.11.norm1.linear.weight quantizer +25-08-31 12:36:49 | D | - Calibrating transformer_blocks.11.norm1_context.linear.weight quantizer +25-08-31 12:36:49 | D | - Calibrating transformer_blocks.11.attn.to_q.weight quantizer +25-08-31 12:36:49 | D | - Calibrating transformer_blocks.11.attn.to_k.weight quantizer +25-08-31 12:36:49 | D | - Calibrating transformer_blocks.11.attn.to_v.weight quantizer +25-08-31 12:36:49 | D | - Calibrating transformer_blocks.11.attn.add_q_proj.weight quantizer +25-08-31 12:36:49 | D | - Calibrating transformer_blocks.11.attn.add_k_proj.weight quantizer +25-08-31 12:36:50 | D | - Calibrating transformer_blocks.11.attn.add_v_proj.weight quantizer +25-08-31 12:36:50 | D | - Calibrating transformer_blocks.11.attn.to_out.0.weight quantizer +25-08-31 12:36:50 | D | - Calibrating transformer_blocks.11.attn.to_add_out.weight quantizer +25-08-31 12:36:50 | D | - Calibrating transformer_blocks.11.ff.net.0.proj.weight quantizer +25-08-31 12:36:50 | D | - Calibrating transformer_blocks.11.ff.net.2.weight quantizer +25-08-31 12:36:51 | D | - Calibrating transformer_blocks.11.ff_context.net.0.proj.weight quantizer +25-08-31 12:36:51 | D | - Calibrating transformer_blocks.11.ff_context.net.2.weight quantizer +25-08-31 12:36:51 | D | - Calibrating weights: block transformer_blocks.12 +25-08-31 12:36:51 | D | - Calibrating transformer_blocks.12.norm1.linear.weight quantizer +25-08-31 12:36:51 | D | - Calibrating transformer_blocks.12.norm1_context.linear.weight quantizer +25-08-31 12:36:51 | D | - Calibrating transformer_blocks.12.attn.to_q.weight quantizer +25-08-31 12:36:51 | D | - Calibrating transformer_blocks.12.attn.to_k.weight quantizer +25-08-31 12:36:52 | D | - Calibrating transformer_blocks.12.attn.to_v.weight quantizer +25-08-31 12:36:52 | D | - Calibrating transformer_blocks.12.attn.add_q_proj.weight quantizer +25-08-31 12:36:52 | D | - Calibrating transformer_blocks.12.attn.add_k_proj.weight quantizer +25-08-31 12:36:52 | D | - Calibrating transformer_blocks.12.attn.add_v_proj.weight quantizer +25-08-31 12:36:52 | D | - Calibrating transformer_blocks.12.attn.to_out.0.weight quantizer +25-08-31 12:36:53 | D | - Calibrating transformer_blocks.12.attn.to_add_out.weight quantizer +25-08-31 12:36:53 | D | - Calibrating transformer_blocks.12.ff.net.0.proj.weight quantizer +25-08-31 12:36:53 | D | - Calibrating transformer_blocks.12.ff.net.2.weight quantizer +25-08-31 12:36:53 | D | - Calibrating transformer_blocks.12.ff_context.net.0.proj.weight quantizer +25-08-31 12:36:53 | D | - Calibrating transformer_blocks.12.ff_context.net.2.weight quantizer +25-08-31 12:36:54 | D | - Calibrating weights: block transformer_blocks.13 +25-08-31 12:36:54 | D | - Calibrating transformer_blocks.13.norm1.linear.weight quantizer +25-08-31 12:36:54 | D | - Calibrating transformer_blocks.13.norm1_context.linear.weight quantizer +25-08-31 12:36:54 | D | - Calibrating transformer_blocks.13.attn.to_q.weight quantizer +25-08-31 12:36:54 | D | - Calibrating transformer_blocks.13.attn.to_k.weight quantizer +25-08-31 12:36:54 | D | - Calibrating transformer_blocks.13.attn.to_v.weight quantizer +25-08-31 12:36:54 | D | - Calibrating transformer_blocks.13.attn.add_q_proj.weight quantizer +25-08-31 12:36:55 | D | - Calibrating transformer_blocks.13.attn.add_k_proj.weight quantizer +25-08-31 12:36:55 | D | - Calibrating transformer_blocks.13.attn.add_v_proj.weight quantizer +25-08-31 12:36:55 | D | - Calibrating transformer_blocks.13.attn.to_out.0.weight quantizer +25-08-31 12:36:55 | D | - Calibrating transformer_blocks.13.attn.to_add_out.weight quantizer +25-08-31 12:36:55 | D | - Calibrating transformer_blocks.13.ff.net.0.proj.weight quantizer +25-08-31 12:36:56 | D | - Calibrating transformer_blocks.13.ff.net.2.weight quantizer +25-08-31 12:36:56 | D | - Calibrating transformer_blocks.13.ff_context.net.0.proj.weight quantizer +25-08-31 12:36:56 | D | - Calibrating transformer_blocks.13.ff_context.net.2.weight quantizer +25-08-31 12:36:56 | D | - Calibrating weights: block transformer_blocks.14 +25-08-31 12:36:56 | D | - Calibrating transformer_blocks.14.norm1.linear.weight quantizer +25-08-31 12:36:56 | D | - Calibrating transformer_blocks.14.norm1_context.linear.weight quantizer +25-08-31 12:36:57 | D | - Calibrating transformer_blocks.14.attn.to_q.weight quantizer +25-08-31 12:36:57 | D | - Calibrating transformer_blocks.14.attn.to_k.weight quantizer +25-08-31 12:36:57 | D | - Calibrating transformer_blocks.14.attn.to_v.weight quantizer +25-08-31 12:36:57 | D | - Calibrating transformer_blocks.14.attn.add_q_proj.weight quantizer +25-08-31 12:36:57 | D | - Calibrating transformer_blocks.14.attn.add_k_proj.weight quantizer +25-08-31 12:36:57 | D | - Calibrating transformer_blocks.14.attn.add_v_proj.weight quantizer +25-08-31 12:36:58 | D | - Calibrating transformer_blocks.14.attn.to_out.0.weight quantizer +25-08-31 12:36:58 | D | - Calibrating transformer_blocks.14.attn.to_add_out.weight quantizer +25-08-31 12:36:58 | D | - Calibrating transformer_blocks.14.ff.net.0.proj.weight quantizer +25-08-31 12:36:58 | D | - Calibrating transformer_blocks.14.ff.net.2.weight quantizer +25-08-31 12:36:58 | D | - Calibrating transformer_blocks.14.ff_context.net.0.proj.weight quantizer +25-08-31 12:36:59 | D | - Calibrating transformer_blocks.14.ff_context.net.2.weight quantizer +25-08-31 12:36:59 | D | - Calibrating weights: block transformer_blocks.15 +25-08-31 12:36:59 | D | - Calibrating transformer_blocks.15.norm1.linear.weight quantizer +25-08-31 12:36:59 | D | - Calibrating transformer_blocks.15.norm1_context.linear.weight quantizer +25-08-31 12:36:59 | D | - Calibrating transformer_blocks.15.attn.to_q.weight quantizer +25-08-31 12:36:59 | D | - Calibrating transformer_blocks.15.attn.to_k.weight quantizer +25-08-31 12:37:00 | D | - Calibrating transformer_blocks.15.attn.to_v.weight quantizer +25-08-31 12:37:00 | D | - Calibrating transformer_blocks.15.attn.add_q_proj.weight quantizer +25-08-31 12:37:00 | D | - Calibrating transformer_blocks.15.attn.add_k_proj.weight quantizer +25-08-31 12:37:00 | D | - Calibrating transformer_blocks.15.attn.add_v_proj.weight quantizer +25-08-31 12:37:00 | D | - Calibrating transformer_blocks.15.attn.to_out.0.weight quantizer +25-08-31 12:37:00 | D | - Calibrating transformer_blocks.15.attn.to_add_out.weight quantizer +25-08-31 12:37:01 | D | - Calibrating transformer_blocks.15.ff.net.0.proj.weight quantizer +25-08-31 12:37:01 | D | - Calibrating transformer_blocks.15.ff.net.2.weight quantizer +25-08-31 12:37:01 | D | - Calibrating transformer_blocks.15.ff_context.net.0.proj.weight quantizer +25-08-31 12:37:01 | D | - Calibrating transformer_blocks.15.ff_context.net.2.weight quantizer +25-08-31 12:37:01 | D | - Calibrating weights: block transformer_blocks.16 +25-08-31 12:37:01 | D | - Calibrating transformer_blocks.16.norm1.linear.weight quantizer +25-08-31 12:37:02 | D | - Calibrating transformer_blocks.16.norm1_context.linear.weight quantizer +25-08-31 12:37:02 | D | - Calibrating transformer_blocks.16.attn.to_q.weight quantizer +25-08-31 12:37:02 | D | - Calibrating transformer_blocks.16.attn.to_k.weight quantizer +25-08-31 12:37:02 | D | - Calibrating transformer_blocks.16.attn.to_v.weight quantizer +25-08-31 12:37:02 | D | - Calibrating transformer_blocks.16.attn.add_q_proj.weight quantizer +25-08-31 12:37:03 | D | - Calibrating transformer_blocks.16.attn.add_k_proj.weight quantizer +25-08-31 12:37:03 | D | - Calibrating transformer_blocks.16.attn.add_v_proj.weight quantizer +25-08-31 12:37:03 | D | - Calibrating transformer_blocks.16.attn.to_out.0.weight quantizer +25-08-31 12:37:03 | D | - Calibrating transformer_blocks.16.attn.to_add_out.weight quantizer +25-08-31 12:37:03 | D | - Calibrating transformer_blocks.16.ff.net.0.proj.weight quantizer +25-08-31 12:37:03 | D | - Calibrating transformer_blocks.16.ff.net.2.weight quantizer +25-08-31 12:37:04 | D | - Calibrating transformer_blocks.16.ff_context.net.0.proj.weight quantizer +25-08-31 12:37:04 | D | - Calibrating transformer_blocks.16.ff_context.net.2.weight quantizer +25-08-31 12:37:04 | D | - Calibrating weights: block transformer_blocks.17 +25-08-31 12:37:04 | D | - Calibrating transformer_blocks.17.norm1.linear.weight quantizer +25-08-31 12:37:04 | D | - Calibrating transformer_blocks.17.norm1_context.linear.weight quantizer +25-08-31 12:37:04 | D | - Calibrating transformer_blocks.17.attn.to_q.weight quantizer +25-08-31 12:37:05 | D | - Calibrating transformer_blocks.17.attn.to_k.weight quantizer +25-08-31 12:37:05 | D | - Calibrating transformer_blocks.17.attn.to_v.weight quantizer +25-08-31 12:37:05 | D | - Calibrating transformer_blocks.17.attn.add_q_proj.weight quantizer +25-08-31 12:37:05 | D | - Calibrating transformer_blocks.17.attn.add_k_proj.weight quantizer +25-08-31 12:37:05 | D | - Calibrating transformer_blocks.17.attn.add_v_proj.weight quantizer +25-08-31 12:37:06 | D | - Calibrating transformer_blocks.17.attn.to_out.0.weight quantizer +25-08-31 12:37:06 | D | - Calibrating transformer_blocks.17.attn.to_add_out.weight quantizer +25-08-31 12:37:06 | D | - Calibrating transformer_blocks.17.ff.net.0.proj.weight quantizer +25-08-31 12:37:06 | D | - Calibrating transformer_blocks.17.ff.net.2.weight quantizer +25-08-31 12:37:06 | D | - Calibrating transformer_blocks.17.ff_context.net.0.proj.weight quantizer +25-08-31 12:37:06 | D | - Calibrating transformer_blocks.17.ff_context.net.2.weight quantizer +25-08-31 12:37:07 | D | - Calibrating weights: block transformer_blocks.18 +25-08-31 12:37:07 | D | - Calibrating transformer_blocks.18.norm1.linear.weight quantizer +25-08-31 12:37:07 | D | - Calibrating transformer_blocks.18.norm1_context.linear.weight quantizer +25-08-31 12:37:07 | D | - Calibrating transformer_blocks.18.attn.to_q.weight quantizer +25-08-31 12:37:07 | D | - Calibrating transformer_blocks.18.attn.to_k.weight quantizer +25-08-31 12:37:07 | D | - Calibrating transformer_blocks.18.attn.to_v.weight quantizer +25-08-31 12:37:08 | D | - Calibrating transformer_blocks.18.attn.add_q_proj.weight quantizer +25-08-31 12:37:08 | D | - Calibrating transformer_blocks.18.attn.add_k_proj.weight quantizer +25-08-31 12:37:08 | D | - Calibrating transformer_blocks.18.attn.add_v_proj.weight quantizer +25-08-31 12:37:08 | D | - Calibrating transformer_blocks.18.attn.to_out.0.weight quantizer +25-08-31 12:37:08 | D | - Calibrating transformer_blocks.18.attn.to_add_out.weight quantizer +25-08-31 12:37:08 | D | - Calibrating transformer_blocks.18.ff.net.0.proj.weight quantizer +25-08-31 12:37:09 | D | - Calibrating transformer_blocks.18.ff.net.2.weight quantizer +25-08-31 12:37:09 | D | - Calibrating transformer_blocks.18.ff_context.net.0.proj.weight quantizer +25-08-31 12:37:09 | D | - Calibrating transformer_blocks.18.ff_context.net.2.weight quantizer +25-08-31 12:37:09 | D | - Calibrating weights: block single_transformer_blocks.0 +25-08-31 12:37:09 | D | - Calibrating single_transformer_blocks.0.norm.linear.weight quantizer +25-08-31 12:37:09 | D | - Calibrating single_transformer_blocks.0.attn.to_q.weight quantizer +25-08-31 12:37:10 | D | - Calibrating single_transformer_blocks.0.attn.to_k.weight quantizer +25-08-31 12:37:10 | D | - Calibrating single_transformer_blocks.0.attn.to_v.weight quantizer +25-08-31 12:37:10 | D | - Calibrating single_transformer_blocks.0.proj_out.linears.0.weight quantizer +25-08-31 12:37:10 | D | - Calibrating single_transformer_blocks.0.proj_mlp.weight quantizer +25-08-31 12:37:10 | D | - Calibrating single_transformer_blocks.0.proj_out.linears.1.weight quantizer +25-08-31 12:37:11 | D | - Calibrating weights: block single_transformer_blocks.1 +25-08-31 12:37:11 | D | - Calibrating single_transformer_blocks.1.norm.linear.weight quantizer +25-08-31 12:37:11 | D | - Calibrating single_transformer_blocks.1.attn.to_q.weight quantizer +25-08-31 12:37:11 | D | - Calibrating single_transformer_blocks.1.attn.to_k.weight quantizer +25-08-31 12:37:11 | D | - Calibrating single_transformer_blocks.1.attn.to_v.weight quantizer +25-08-31 12:37:11 | D | - Calibrating single_transformer_blocks.1.proj_out.linears.0.weight quantizer +25-08-31 12:37:12 | D | - Calibrating single_transformer_blocks.1.proj_mlp.weight quantizer +25-08-31 12:37:12 | D | - Calibrating single_transformer_blocks.1.proj_out.linears.1.weight quantizer +25-08-31 12:37:12 | D | - Calibrating weights: block single_transformer_blocks.2 +25-08-31 12:37:12 | D | - Calibrating single_transformer_blocks.2.norm.linear.weight quantizer +25-08-31 12:37:12 | D | - Calibrating single_transformer_blocks.2.attn.to_q.weight quantizer +25-08-31 12:37:12 | D | - Calibrating single_transformer_blocks.2.attn.to_k.weight quantizer +25-08-31 12:37:12 | D | - Calibrating single_transformer_blocks.2.attn.to_v.weight quantizer +25-08-31 12:37:13 | D | - Calibrating single_transformer_blocks.2.proj_out.linears.0.weight quantizer +25-08-31 12:37:13 | D | - Calibrating single_transformer_blocks.2.proj_mlp.weight quantizer +25-08-31 12:37:13 | D | - Calibrating single_transformer_blocks.2.proj_out.linears.1.weight quantizer +25-08-31 12:37:13 | D | - Calibrating weights: block single_transformer_blocks.3 +25-08-31 12:37:13 | D | - Calibrating single_transformer_blocks.3.norm.linear.weight quantizer +25-08-31 12:37:13 | D | - Calibrating single_transformer_blocks.3.attn.to_q.weight quantizer +25-08-31 12:37:14 | D | - Calibrating single_transformer_blocks.3.attn.to_k.weight quantizer +25-08-31 12:37:14 | D | - Calibrating single_transformer_blocks.3.attn.to_v.weight quantizer +25-08-31 12:37:14 | D | - Calibrating single_transformer_blocks.3.proj_out.linears.0.weight quantizer +25-08-31 12:37:14 | D | - Calibrating single_transformer_blocks.3.proj_mlp.weight quantizer +25-08-31 12:37:14 | D | - Calibrating single_transformer_blocks.3.proj_out.linears.1.weight quantizer +25-08-31 12:37:14 | D | - Calibrating weights: block single_transformer_blocks.4 +25-08-31 12:37:14 | D | - Calibrating single_transformer_blocks.4.norm.linear.weight quantizer +25-08-31 12:37:15 | D | - Calibrating single_transformer_blocks.4.attn.to_q.weight quantizer +25-08-31 12:37:15 | D | - Calibrating single_transformer_blocks.4.attn.to_k.weight quantizer +25-08-31 12:37:15 | D | - Calibrating single_transformer_blocks.4.attn.to_v.weight quantizer +25-08-31 12:37:15 | D | - Calibrating single_transformer_blocks.4.proj_out.linears.0.weight quantizer +25-08-31 12:37:15 | D | - Calibrating single_transformer_blocks.4.proj_mlp.weight quantizer +25-08-31 12:37:16 | D | - Calibrating single_transformer_blocks.4.proj_out.linears.1.weight quantizer +25-08-31 12:37:16 | D | - Calibrating weights: block single_transformer_blocks.5 +25-08-31 12:37:16 | D | - Calibrating single_transformer_blocks.5.norm.linear.weight quantizer +25-08-31 12:37:16 | D | - Calibrating single_transformer_blocks.5.attn.to_q.weight quantizer +25-08-31 12:37:16 | D | - Calibrating single_transformer_blocks.5.attn.to_k.weight quantizer +25-08-31 12:37:16 | D | - Calibrating single_transformer_blocks.5.attn.to_v.weight quantizer +25-08-31 12:37:16 | D | - Calibrating single_transformer_blocks.5.proj_out.linears.0.weight quantizer +25-08-31 12:37:17 | D | - Calibrating single_transformer_blocks.5.proj_mlp.weight quantizer +25-08-31 12:37:17 | D | - Calibrating single_transformer_blocks.5.proj_out.linears.1.weight quantizer +25-08-31 12:37:17 | D | - Calibrating weights: block single_transformer_blocks.6 +25-08-31 12:37:17 | D | - Calibrating single_transformer_blocks.6.norm.linear.weight quantizer +25-08-31 12:37:17 | D | - Calibrating single_transformer_blocks.6.attn.to_q.weight quantizer +25-08-31 12:37:17 | D | - Calibrating single_transformer_blocks.6.attn.to_k.weight quantizer +25-08-31 12:37:18 | D | - Calibrating single_transformer_blocks.6.attn.to_v.weight quantizer +25-08-31 12:37:18 | D | - Calibrating single_transformer_blocks.6.proj_out.linears.0.weight quantizer +25-08-31 12:37:18 | D | - Calibrating single_transformer_blocks.6.proj_mlp.weight quantizer +25-08-31 12:37:18 | D | - Calibrating single_transformer_blocks.6.proj_out.linears.1.weight quantizer +25-08-31 12:37:18 | D | - Calibrating weights: block single_transformer_blocks.7 +25-08-31 12:37:18 | D | - Calibrating single_transformer_blocks.7.norm.linear.weight quantizer +25-08-31 12:37:19 | D | - Calibrating single_transformer_blocks.7.attn.to_q.weight quantizer +25-08-31 12:37:19 | D | - Calibrating single_transformer_blocks.7.attn.to_k.weight quantizer +25-08-31 12:37:19 | D | - Calibrating single_transformer_blocks.7.attn.to_v.weight quantizer +25-08-31 12:37:19 | D | - Calibrating single_transformer_blocks.7.proj_out.linears.0.weight quantizer +25-08-31 12:37:19 | D | - Calibrating single_transformer_blocks.7.proj_mlp.weight quantizer +25-08-31 12:37:20 | D | - Calibrating single_transformer_blocks.7.proj_out.linears.1.weight quantizer +25-08-31 12:37:20 | D | - Calibrating weights: block single_transformer_blocks.8 +25-08-31 12:37:20 | D | - Calibrating single_transformer_blocks.8.norm.linear.weight quantizer +25-08-31 12:37:20 | D | - Calibrating single_transformer_blocks.8.attn.to_q.weight quantizer +25-08-31 12:37:20 | D | - Calibrating single_transformer_blocks.8.attn.to_k.weight quantizer +25-08-31 12:37:20 | D | - Calibrating single_transformer_blocks.8.attn.to_v.weight quantizer +25-08-31 12:37:20 | D | - Calibrating single_transformer_blocks.8.proj_out.linears.0.weight quantizer +25-08-31 12:37:21 | D | - Calibrating single_transformer_blocks.8.proj_mlp.weight quantizer +25-08-31 12:37:21 | D | - Calibrating single_transformer_blocks.8.proj_out.linears.1.weight quantizer +25-08-31 12:37:21 | D | - Calibrating weights: block single_transformer_blocks.9 +25-08-31 12:37:21 | D | - Calibrating single_transformer_blocks.9.norm.linear.weight quantizer +25-08-31 12:37:21 | D | - Calibrating single_transformer_blocks.9.attn.to_q.weight quantizer +25-08-31 12:37:21 | D | - Calibrating single_transformer_blocks.9.attn.to_k.weight quantizer +25-08-31 12:37:22 | D | - Calibrating single_transformer_blocks.9.attn.to_v.weight quantizer +25-08-31 12:37:22 | D | - Calibrating single_transformer_blocks.9.proj_out.linears.0.weight quantizer +25-08-31 12:37:22 | D | - Calibrating single_transformer_blocks.9.proj_mlp.weight quantizer +25-08-31 12:37:22 | D | - Calibrating single_transformer_blocks.9.proj_out.linears.1.weight quantizer +25-08-31 12:37:22 | D | - Calibrating weights: block single_transformer_blocks.10 +25-08-31 12:37:22 | D | - Calibrating single_transformer_blocks.10.norm.linear.weight quantizer +25-08-31 12:37:22 | D | - Calibrating single_transformer_blocks.10.attn.to_q.weight quantizer +25-08-31 12:37:23 | D | - Calibrating single_transformer_blocks.10.attn.to_k.weight quantizer +25-08-31 12:37:23 | D | - Calibrating single_transformer_blocks.10.attn.to_v.weight quantizer +25-08-31 12:37:23 | D | - Calibrating single_transformer_blocks.10.proj_out.linears.0.weight quantizer +25-08-31 12:37:23 | D | - Calibrating single_transformer_blocks.10.proj_mlp.weight quantizer +25-08-31 12:37:23 | D | - Calibrating single_transformer_blocks.10.proj_out.linears.1.weight quantizer +25-08-31 12:37:24 | D | - Calibrating weights: block single_transformer_blocks.11 +25-08-31 12:37:24 | D | - Calibrating single_transformer_blocks.11.norm.linear.weight quantizer +25-08-31 12:37:24 | D | - Calibrating single_transformer_blocks.11.attn.to_q.weight quantizer +25-08-31 12:37:24 | D | - Calibrating single_transformer_blocks.11.attn.to_k.weight quantizer +25-08-31 12:37:24 | D | - Calibrating single_transformer_blocks.11.attn.to_v.weight quantizer +25-08-31 12:37:24 | D | - Calibrating single_transformer_blocks.11.proj_out.linears.0.weight quantizer +25-08-31 12:37:25 | D | - Calibrating single_transformer_blocks.11.proj_mlp.weight quantizer +25-08-31 12:37:25 | D | - Calibrating single_transformer_blocks.11.proj_out.linears.1.weight quantizer +25-08-31 12:37:25 | D | - Calibrating weights: block single_transformer_blocks.12 +25-08-31 12:37:25 | D | - Calibrating single_transformer_blocks.12.norm.linear.weight quantizer +25-08-31 12:37:25 | D | - Calibrating single_transformer_blocks.12.attn.to_q.weight quantizer +25-08-31 12:37:25 | D | - Calibrating single_transformer_blocks.12.attn.to_k.weight quantizer +25-08-31 12:37:25 | D | - Calibrating single_transformer_blocks.12.attn.to_v.weight quantizer +25-08-31 12:37:26 | D | - Calibrating single_transformer_blocks.12.proj_out.linears.0.weight quantizer +25-08-31 12:37:26 | D | - Calibrating single_transformer_blocks.12.proj_mlp.weight quantizer +25-08-31 12:37:26 | D | - Calibrating single_transformer_blocks.12.proj_out.linears.1.weight quantizer +25-08-31 12:37:26 | D | - Calibrating weights: block single_transformer_blocks.13 +25-08-31 12:37:26 | D | - Calibrating single_transformer_blocks.13.norm.linear.weight quantizer +25-08-31 12:37:26 | D | - Calibrating single_transformer_blocks.13.attn.to_q.weight quantizer +25-08-31 12:37:27 | D | - Calibrating single_transformer_blocks.13.attn.to_k.weight quantizer +25-08-31 12:37:27 | D | - Calibrating single_transformer_blocks.13.attn.to_v.weight quantizer +25-08-31 12:37:27 | D | - Calibrating single_transformer_blocks.13.proj_out.linears.0.weight quantizer +25-08-31 12:37:27 | D | - Calibrating single_transformer_blocks.13.proj_mlp.weight quantizer +25-08-31 12:37:27 | D | - Calibrating single_transformer_blocks.13.proj_out.linears.1.weight quantizer +25-08-31 12:37:28 | D | - Calibrating weights: block single_transformer_blocks.14 +25-08-31 12:37:28 | D | - Calibrating single_transformer_blocks.14.norm.linear.weight quantizer +25-08-31 12:37:28 | D | - Calibrating single_transformer_blocks.14.attn.to_q.weight quantizer +25-08-31 12:37:28 | D | - Calibrating single_transformer_blocks.14.attn.to_k.weight quantizer +25-08-31 12:37:28 | D | - Calibrating single_transformer_blocks.14.attn.to_v.weight quantizer +25-08-31 12:37:28 | D | - Calibrating single_transformer_blocks.14.proj_out.linears.0.weight quantizer +25-08-31 12:37:28 | D | - Calibrating single_transformer_blocks.14.proj_mlp.weight quantizer +25-08-31 12:37:29 | D | - Calibrating single_transformer_blocks.14.proj_out.linears.1.weight quantizer +25-08-31 12:37:29 | D | - Calibrating weights: block single_transformer_blocks.15 +25-08-31 12:37:29 | D | - Calibrating single_transformer_blocks.15.norm.linear.weight quantizer +25-08-31 12:37:29 | D | - Calibrating single_transformer_blocks.15.attn.to_q.weight quantizer +25-08-31 12:37:29 | D | - Calibrating single_transformer_blocks.15.attn.to_k.weight quantizer +25-08-31 12:37:29 | D | - Calibrating single_transformer_blocks.15.attn.to_v.weight quantizer +25-08-31 12:37:30 | D | - Calibrating single_transformer_blocks.15.proj_out.linears.0.weight quantizer +25-08-31 12:37:30 | D | - Calibrating single_transformer_blocks.15.proj_mlp.weight quantizer +25-08-31 12:37:30 | D | - Calibrating single_transformer_blocks.15.proj_out.linears.1.weight quantizer +25-08-31 12:37:30 | D | - Calibrating weights: block single_transformer_blocks.16 +25-08-31 12:37:30 | D | - Calibrating single_transformer_blocks.16.norm.linear.weight quantizer +25-08-31 12:37:30 | D | - Calibrating single_transformer_blocks.16.attn.to_q.weight quantizer +25-08-31 12:37:31 | D | - Calibrating single_transformer_blocks.16.attn.to_k.weight quantizer +25-08-31 12:37:31 | D | - Calibrating single_transformer_blocks.16.attn.to_v.weight quantizer +25-08-31 12:37:31 | D | - Calibrating single_transformer_blocks.16.proj_out.linears.0.weight quantizer +25-08-31 12:37:31 | D | - Calibrating single_transformer_blocks.16.proj_mlp.weight quantizer +25-08-31 12:37:31 | D | - Calibrating single_transformer_blocks.16.proj_out.linears.1.weight quantizer +25-08-31 12:37:31 | D | - Calibrating weights: block single_transformer_blocks.17 +25-08-31 12:37:31 | D | - Calibrating single_transformer_blocks.17.norm.linear.weight quantizer +25-08-31 12:37:32 | D | - Calibrating single_transformer_blocks.17.attn.to_q.weight quantizer +25-08-31 12:37:32 | D | - Calibrating single_transformer_blocks.17.attn.to_k.weight quantizer +25-08-31 12:37:32 | D | - Calibrating single_transformer_blocks.17.attn.to_v.weight quantizer +25-08-31 12:37:32 | D | - Calibrating single_transformer_blocks.17.proj_out.linears.0.weight quantizer +25-08-31 12:37:32 | D | - Calibrating single_transformer_blocks.17.proj_mlp.weight quantizer +25-08-31 12:37:33 | D | - Calibrating single_transformer_blocks.17.proj_out.linears.1.weight quantizer +25-08-31 12:37:33 | D | - Calibrating weights: block single_transformer_blocks.18 +25-08-31 12:37:33 | D | - Calibrating single_transformer_blocks.18.norm.linear.weight quantizer +25-08-31 12:37:33 | D | - Calibrating single_transformer_blocks.18.attn.to_q.weight quantizer +25-08-31 12:37:33 | D | - Calibrating single_transformer_blocks.18.attn.to_k.weight quantizer +25-08-31 12:37:33 | D | - Calibrating single_transformer_blocks.18.attn.to_v.weight quantizer +25-08-31 12:37:34 | D | - Calibrating single_transformer_blocks.18.proj_out.linears.0.weight quantizer +25-08-31 12:37:34 | D | - Calibrating single_transformer_blocks.18.proj_mlp.weight quantizer +25-08-31 12:37:34 | D | - Calibrating single_transformer_blocks.18.proj_out.linears.1.weight quantizer +25-08-31 12:37:34 | D | - Calibrating weights: block single_transformer_blocks.19 +25-08-31 12:37:34 | D | - Calibrating single_transformer_blocks.19.norm.linear.weight quantizer +25-08-31 12:37:34 | D | - Calibrating single_transformer_blocks.19.attn.to_q.weight quantizer +25-08-31 12:37:34 | D | - Calibrating single_transformer_blocks.19.attn.to_k.weight quantizer +25-08-31 12:37:35 | D | - Calibrating single_transformer_blocks.19.attn.to_v.weight quantizer +25-08-31 12:37:35 | D | - Calibrating single_transformer_blocks.19.proj_out.linears.0.weight quantizer +25-08-31 12:37:35 | D | - Calibrating single_transformer_blocks.19.proj_mlp.weight quantizer +25-08-31 12:37:35 | D | - Calibrating single_transformer_blocks.19.proj_out.linears.1.weight quantizer +25-08-31 12:37:35 | D | - Calibrating weights: block single_transformer_blocks.20 +25-08-31 12:37:35 | D | - Calibrating single_transformer_blocks.20.norm.linear.weight quantizer +25-08-31 12:37:36 | D | - Calibrating single_transformer_blocks.20.attn.to_q.weight quantizer +25-08-31 12:37:36 | D | - Calibrating single_transformer_blocks.20.attn.to_k.weight quantizer +25-08-31 12:37:36 | D | - Calibrating single_transformer_blocks.20.attn.to_v.weight quantizer +25-08-31 12:37:36 | D | - Calibrating single_transformer_blocks.20.proj_out.linears.0.weight quantizer +25-08-31 12:37:36 | D | - Calibrating single_transformer_blocks.20.proj_mlp.weight quantizer +25-08-31 12:37:37 | D | - Calibrating single_transformer_blocks.20.proj_out.linears.1.weight quantizer +25-08-31 12:37:37 | D | - Calibrating weights: block single_transformer_blocks.21 +25-08-31 12:37:37 | D | - Calibrating single_transformer_blocks.21.norm.linear.weight quantizer +25-08-31 12:37:37 | D | - Calibrating single_transformer_blocks.21.attn.to_q.weight quantizer +25-08-31 12:37:37 | D | - Calibrating single_transformer_blocks.21.attn.to_k.weight quantizer +25-08-31 12:37:37 | D | - Calibrating single_transformer_blocks.21.attn.to_v.weight quantizer +25-08-31 12:37:37 | D | - Calibrating single_transformer_blocks.21.proj_out.linears.0.weight quantizer +25-08-31 12:37:38 | D | - Calibrating single_transformer_blocks.21.proj_mlp.weight quantizer +25-08-31 12:37:38 | D | - Calibrating single_transformer_blocks.21.proj_out.linears.1.weight quantizer +25-08-31 12:37:38 | D | - Calibrating weights: block single_transformer_blocks.22 +25-08-31 12:37:38 | D | - Calibrating single_transformer_blocks.22.norm.linear.weight quantizer +25-08-31 12:37:38 | D | - Calibrating single_transformer_blocks.22.attn.to_q.weight quantizer +25-08-31 12:37:38 | D | - Calibrating single_transformer_blocks.22.attn.to_k.weight quantizer +25-08-31 12:37:39 | D | - Calibrating single_transformer_blocks.22.attn.to_v.weight quantizer +25-08-31 12:37:39 | D | - Calibrating single_transformer_blocks.22.proj_out.linears.0.weight quantizer +25-08-31 12:37:39 | D | - Calibrating single_transformer_blocks.22.proj_mlp.weight quantizer +25-08-31 12:37:39 | D | - Calibrating single_transformer_blocks.22.proj_out.linears.1.weight quantizer +25-08-31 12:37:39 | D | - Calibrating weights: block single_transformer_blocks.23 +25-08-31 12:37:39 | D | - Calibrating single_transformer_blocks.23.norm.linear.weight quantizer +25-08-31 12:37:40 | D | - Calibrating single_transformer_blocks.23.attn.to_q.weight quantizer +25-08-31 12:37:40 | D | - Calibrating single_transformer_blocks.23.attn.to_k.weight quantizer +25-08-31 12:37:40 | D | - Calibrating single_transformer_blocks.23.attn.to_v.weight quantizer +25-08-31 12:37:40 | D | - Calibrating single_transformer_blocks.23.proj_out.linears.0.weight quantizer +25-08-31 12:37:40 | D | - Calibrating single_transformer_blocks.23.proj_mlp.weight quantizer +25-08-31 12:37:40 | D | - Calibrating single_transformer_blocks.23.proj_out.linears.1.weight quantizer +25-08-31 12:37:41 | D | - Calibrating weights: block single_transformer_blocks.24 +25-08-31 12:37:41 | D | - Calibrating single_transformer_blocks.24.norm.linear.weight quantizer +25-08-31 12:37:41 | D | - Calibrating single_transformer_blocks.24.attn.to_q.weight quantizer +25-08-31 12:37:41 | D | - Calibrating single_transformer_blocks.24.attn.to_k.weight quantizer +25-08-31 12:37:41 | D | - Calibrating single_transformer_blocks.24.attn.to_v.weight quantizer +25-08-31 12:37:41 | D | - Calibrating single_transformer_blocks.24.proj_out.linears.0.weight quantizer +25-08-31 12:37:42 | D | - Calibrating single_transformer_blocks.24.proj_mlp.weight quantizer +25-08-31 12:37:42 | D | - Calibrating single_transformer_blocks.24.proj_out.linears.1.weight quantizer +25-08-31 12:37:42 | D | - Calibrating weights: block single_transformer_blocks.25 +25-08-31 12:37:42 | D | - Calibrating single_transformer_blocks.25.norm.linear.weight quantizer +25-08-31 12:37:42 | D | - Calibrating single_transformer_blocks.25.attn.to_q.weight quantizer +25-08-31 12:37:42 | D | - Calibrating single_transformer_blocks.25.attn.to_k.weight quantizer +25-08-31 12:37:43 | D | - Calibrating single_transformer_blocks.25.attn.to_v.weight quantizer +25-08-31 12:37:43 | D | - Calibrating single_transformer_blocks.25.proj_out.linears.0.weight quantizer +25-08-31 12:37:43 | D | - Calibrating single_transformer_blocks.25.proj_mlp.weight quantizer +25-08-31 12:37:43 | D | - Calibrating single_transformer_blocks.25.proj_out.linears.1.weight quantizer +25-08-31 12:37:43 | D | - Calibrating weights: block single_transformer_blocks.26 +25-08-31 12:37:43 | D | - Calibrating single_transformer_blocks.26.norm.linear.weight quantizer +25-08-31 12:37:43 | D | - Calibrating single_transformer_blocks.26.attn.to_q.weight quantizer +25-08-31 12:37:44 | D | - Calibrating single_transformer_blocks.26.attn.to_k.weight quantizer +25-08-31 12:37:44 | D | - Calibrating single_transformer_blocks.26.attn.to_v.weight quantizer +25-08-31 12:37:44 | D | - Calibrating single_transformer_blocks.26.proj_out.linears.0.weight quantizer +25-08-31 12:37:44 | D | - Calibrating single_transformer_blocks.26.proj_mlp.weight quantizer +25-08-31 12:37:44 | D | - Calibrating single_transformer_blocks.26.proj_out.linears.1.weight quantizer +25-08-31 12:37:45 | D | - Calibrating weights: block single_transformer_blocks.27 +25-08-31 12:37:45 | D | - Calibrating single_transformer_blocks.27.norm.linear.weight quantizer +25-08-31 12:37:45 | D | - Calibrating single_transformer_blocks.27.attn.to_q.weight quantizer +25-08-31 12:37:45 | D | - Calibrating single_transformer_blocks.27.attn.to_k.weight quantizer +25-08-31 12:37:45 | D | - Calibrating single_transformer_blocks.27.attn.to_v.weight quantizer +25-08-31 12:37:45 | D | - Calibrating single_transformer_blocks.27.proj_out.linears.0.weight quantizer +25-08-31 12:37:45 | D | - Calibrating single_transformer_blocks.27.proj_mlp.weight quantizer +25-08-31 12:37:46 | D | - Calibrating single_transformer_blocks.27.proj_out.linears.1.weight quantizer +25-08-31 12:37:46 | D | - Calibrating weights: block single_transformer_blocks.28 +25-08-31 12:37:46 | D | - Calibrating single_transformer_blocks.28.norm.linear.weight quantizer +25-08-31 12:37:46 | D | - Calibrating single_transformer_blocks.28.attn.to_q.weight quantizer +25-08-31 12:37:46 | D | - Calibrating single_transformer_blocks.28.attn.to_k.weight quantizer +25-08-31 12:37:46 | D | - Calibrating single_transformer_blocks.28.attn.to_v.weight quantizer +25-08-31 12:37:47 | D | - Calibrating single_transformer_blocks.28.proj_out.linears.0.weight quantizer +25-08-31 12:37:47 | D | - Calibrating single_transformer_blocks.28.proj_mlp.weight quantizer +25-08-31 12:37:47 | D | - Calibrating single_transformer_blocks.28.proj_out.linears.1.weight quantizer +25-08-31 12:37:47 | D | - Calibrating weights: block single_transformer_blocks.29 +25-08-31 12:37:47 | D | - Calibrating single_transformer_blocks.29.norm.linear.weight quantizer +25-08-31 12:37:47 | D | - Calibrating single_transformer_blocks.29.attn.to_q.weight quantizer +25-08-31 12:37:48 | D | - Calibrating single_transformer_blocks.29.attn.to_k.weight quantizer +25-08-31 12:37:48 | D | - Calibrating single_transformer_blocks.29.attn.to_v.weight quantizer +25-08-31 12:37:48 | D | - Calibrating single_transformer_blocks.29.proj_out.linears.0.weight quantizer +25-08-31 12:37:48 | D | - Calibrating single_transformer_blocks.29.proj_mlp.weight quantizer +25-08-31 12:37:48 | D | - Calibrating single_transformer_blocks.29.proj_out.linears.1.weight quantizer +25-08-31 12:37:49 | D | - Calibrating weights: block single_transformer_blocks.30 +25-08-31 12:37:49 | D | - Calibrating single_transformer_blocks.30.norm.linear.weight quantizer +25-08-31 12:37:49 | D | - Calibrating single_transformer_blocks.30.attn.to_q.weight quantizer +25-08-31 12:37:49 | D | - Calibrating single_transformer_blocks.30.attn.to_k.weight quantizer +25-08-31 12:37:49 | D | - Calibrating single_transformer_blocks.30.attn.to_v.weight quantizer +25-08-31 12:37:49 | D | - Calibrating single_transformer_blocks.30.proj_out.linears.0.weight quantizer +25-08-31 12:37:49 | D | - Calibrating single_transformer_blocks.30.proj_mlp.weight quantizer +25-08-31 12:37:50 | D | - Calibrating single_transformer_blocks.30.proj_out.linears.1.weight quantizer +25-08-31 12:37:50 | D | - Calibrating weights: block single_transformer_blocks.31 +25-08-31 12:37:50 | D | - Calibrating single_transformer_blocks.31.norm.linear.weight quantizer +25-08-31 12:37:50 | D | - Calibrating single_transformer_blocks.31.attn.to_q.weight quantizer +25-08-31 12:37:50 | D | - Calibrating single_transformer_blocks.31.attn.to_k.weight quantizer +25-08-31 12:37:50 | D | - Calibrating single_transformer_blocks.31.attn.to_v.weight quantizer +25-08-31 12:37:51 | D | - Calibrating single_transformer_blocks.31.proj_out.linears.0.weight quantizer +25-08-31 12:37:51 | D | - Calibrating single_transformer_blocks.31.proj_mlp.weight quantizer +25-08-31 12:37:51 | D | - Calibrating single_transformer_blocks.31.proj_out.linears.1.weight quantizer +25-08-31 12:37:51 | D | - Calibrating weights: block single_transformer_blocks.32 +25-08-31 12:37:51 | D | - Calibrating single_transformer_blocks.32.norm.linear.weight quantizer +25-08-31 12:37:51 | D | - Calibrating single_transformer_blocks.32.attn.to_q.weight quantizer +25-08-31 12:37:51 | D | - Calibrating single_transformer_blocks.32.attn.to_k.weight quantizer +25-08-31 12:37:52 | D | - Calibrating single_transformer_blocks.32.attn.to_v.weight quantizer +25-08-31 12:37:52 | D | - Calibrating single_transformer_blocks.32.proj_out.linears.0.weight quantizer +25-08-31 12:37:52 | D | - Calibrating single_transformer_blocks.32.proj_mlp.weight quantizer +25-08-31 12:37:52 | D | - Calibrating single_transformer_blocks.32.proj_out.linears.1.weight quantizer +25-08-31 12:37:52 | D | - Calibrating weights: block single_transformer_blocks.33 +25-08-31 12:37:52 | D | - Calibrating single_transformer_blocks.33.norm.linear.weight quantizer +25-08-31 12:37:53 | D | - Calibrating single_transformer_blocks.33.attn.to_q.weight quantizer +25-08-31 12:37:53 | D | - Calibrating single_transformer_blocks.33.attn.to_k.weight quantizer +25-08-31 12:37:53 | D | - Calibrating single_transformer_blocks.33.attn.to_v.weight quantizer +25-08-31 12:37:53 | D | - Calibrating single_transformer_blocks.33.proj_out.linears.0.weight quantizer +25-08-31 12:37:53 | D | - Calibrating single_transformer_blocks.33.proj_mlp.weight quantizer +25-08-31 12:37:54 | D | - Calibrating single_transformer_blocks.33.proj_out.linears.1.weight quantizer +25-08-31 12:37:54 | D | - Calibrating weights: block single_transformer_blocks.34 +25-08-31 12:37:54 | D | - Calibrating single_transformer_blocks.34.norm.linear.weight quantizer +25-08-31 12:37:54 | D | - Calibrating single_transformer_blocks.34.attn.to_q.weight quantizer +25-08-31 12:37:54 | D | - Calibrating single_transformer_blocks.34.attn.to_k.weight quantizer +25-08-31 12:37:54 | D | - Calibrating single_transformer_blocks.34.attn.to_v.weight quantizer +25-08-31 12:37:54 | D | - Calibrating single_transformer_blocks.34.proj_out.linears.0.weight quantizer +25-08-31 12:37:55 | D | - Calibrating single_transformer_blocks.34.proj_mlp.weight quantizer +25-08-31 12:37:55 | D | - Calibrating single_transformer_blocks.34.proj_out.linears.1.weight quantizer +25-08-31 12:37:55 | D | - Calibrating weights: block single_transformer_blocks.35 +25-08-31 12:37:55 | D | - Calibrating single_transformer_blocks.35.norm.linear.weight quantizer +25-08-31 12:37:55 | D | - Calibrating single_transformer_blocks.35.attn.to_q.weight quantizer +25-08-31 12:37:55 | D | - Calibrating single_transformer_blocks.35.attn.to_k.weight quantizer +25-08-31 12:37:56 | D | - Calibrating single_transformer_blocks.35.attn.to_v.weight quantizer +25-08-31 12:37:56 | D | - Calibrating single_transformer_blocks.35.proj_out.linears.0.weight quantizer +25-08-31 12:37:56 | D | - Calibrating single_transformer_blocks.35.proj_mlp.weight quantizer +25-08-31 12:37:56 | D | - Calibrating single_transformer_blocks.35.proj_out.linears.1.weight quantizer +25-08-31 12:37:56 | D | - Calibrating weights: block single_transformer_blocks.36 +25-08-31 12:37:56 | D | - Calibrating single_transformer_blocks.36.norm.linear.weight quantizer +25-08-31 12:37:57 | D | - Calibrating single_transformer_blocks.36.attn.to_q.weight quantizer +25-08-31 12:37:57 | D | - Calibrating single_transformer_blocks.36.attn.to_k.weight quantizer +25-08-31 12:37:57 | D | - Calibrating single_transformer_blocks.36.attn.to_v.weight quantizer +25-08-31 12:37:57 | D | - Calibrating single_transformer_blocks.36.proj_out.linears.0.weight quantizer +25-08-31 12:37:57 | D | - Calibrating single_transformer_blocks.36.proj_mlp.weight quantizer +25-08-31 12:37:57 | D | - Calibrating single_transformer_blocks.36.proj_out.linears.1.weight quantizer +25-08-31 12:37:58 | D | - Calibrating weights: block single_transformer_blocks.37 +25-08-31 12:37:58 | D | - Calibrating single_transformer_blocks.37.norm.linear.weight quantizer +25-08-31 12:37:58 | D | - Calibrating single_transformer_blocks.37.attn.to_q.weight quantizer +25-08-31 12:37:58 | D | - Calibrating single_transformer_blocks.37.attn.to_k.weight quantizer +25-08-31 12:37:58 | D | - Calibrating single_transformer_blocks.37.attn.to_v.weight quantizer +25-08-31 12:37:58 | D | - Calibrating single_transformer_blocks.37.proj_out.linears.0.weight quantizer +25-08-31 12:37:59 | D | - Calibrating single_transformer_blocks.37.proj_mlp.weight quantizer +25-08-31 12:37:59 | D | - Calibrating single_transformer_blocks.37.proj_out.linears.1.weight quantizer +25-08-31 12:37:59 | D | - Quantizing weights: block transformer_blocks.0 +25-08-31 12:37:59 | D | - Quantizing transformer_blocks.0.norm1.linear.weight +25-08-31 12:37:59 | D | + quant_dtype: sint4 +25-08-31 12:37:59 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:37:59 | D | + scale_dtype: (None,) +25-08-31 12:37:59 | D | - Quantizing transformer_blocks.0.norm1_context.linear.weight +25-08-31 12:37:59 | D | + quant_dtype: sint4 +25-08-31 12:37:59 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:37:59 | D | + scale_dtype: (None,) +25-08-31 12:37:59 | D | - Quantizing transformer_blocks.0.attn.to_q.weight +25-08-31 12:37:59 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:37:59 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:37:59 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:00 | D | - Quantizing transformer_blocks.0.attn.to_k.weight +25-08-31 12:38:00 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:00 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:00 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:00 | D | - Quantizing transformer_blocks.0.attn.to_v.weight +25-08-31 12:38:00 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:00 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:00 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:00 | D | - Quantizing transformer_blocks.0.attn.add_q_proj.weight +25-08-31 12:38:00 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:00 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:00 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:00 | D | - Quantizing transformer_blocks.0.attn.add_k_proj.weight +25-08-31 12:38:00 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:00 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:00 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:00 | D | - Quantizing transformer_blocks.0.attn.add_v_proj.weight +25-08-31 12:38:00 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:00 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:00 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:01 | D | - Quantizing transformer_blocks.0.attn.to_out.0.weight +25-08-31 12:38:01 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:01 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:01 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:01 | D | - Quantizing transformer_blocks.0.attn.to_add_out.weight +25-08-31 12:38:01 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:01 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:01 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:01 | D | - Quantizing transformer_blocks.0.ff.net.0.proj.weight +25-08-31 12:38:01 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:01 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:01 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:01 | D | - Quantizing transformer_blocks.0.ff.net.2.weight +25-08-31 12:38:01 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:01 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:01 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:01 | D | - Quantizing transformer_blocks.0.ff_context.net.0.proj.weight +25-08-31 12:38:01 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:01 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:01 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:01 | D | - Quantizing transformer_blocks.0.ff_context.net.2.weight +25-08-31 12:38:01 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:01 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:01 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:02 | D | - Quantizing weights: block transformer_blocks.1 +25-08-31 12:38:02 | D | - Quantizing transformer_blocks.1.norm1.linear.weight +25-08-31 12:38:02 | D | + quant_dtype: sint4 +25-08-31 12:38:02 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:02 | D | + scale_dtype: (None,) +25-08-31 12:38:02 | D | - Quantizing transformer_blocks.1.norm1_context.linear.weight +25-08-31 12:38:02 | D | + quant_dtype: sint4 +25-08-31 12:38:02 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:02 | D | + scale_dtype: (None,) +25-08-31 12:38:02 | D | - Quantizing transformer_blocks.1.attn.to_q.weight +25-08-31 12:38:02 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:02 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:02 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:02 | D | - Quantizing transformer_blocks.1.attn.to_k.weight +25-08-31 12:38:02 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:02 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:02 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:02 | D | - Quantizing transformer_blocks.1.attn.to_v.weight +25-08-31 12:38:02 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:02 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:02 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:03 | D | - Quantizing transformer_blocks.1.attn.add_q_proj.weight +25-08-31 12:38:03 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:03 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:03 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:03 | D | - Quantizing transformer_blocks.1.attn.add_k_proj.weight +25-08-31 12:38:03 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:03 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:03 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:03 | D | - Quantizing transformer_blocks.1.attn.add_v_proj.weight +25-08-31 12:38:03 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:03 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:03 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:03 | D | - Quantizing transformer_blocks.1.attn.to_out.0.weight +25-08-31 12:38:03 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:03 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:03 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:03 | D | - Quantizing transformer_blocks.1.attn.to_add_out.weight +25-08-31 12:38:03 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:03 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:03 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:04 | D | - Quantizing transformer_blocks.1.ff.net.0.proj.weight +25-08-31 12:38:04 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:04 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:04 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:04 | D | - Quantizing transformer_blocks.1.ff.net.2.weight +25-08-31 12:38:04 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:04 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:04 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:04 | D | - Quantizing transformer_blocks.1.ff_context.net.0.proj.weight +25-08-31 12:38:04 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:04 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:04 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:04 | D | - Quantizing transformer_blocks.1.ff_context.net.2.weight +25-08-31 12:38:04 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:04 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:04 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:04 | D | - Quantizing weights: block transformer_blocks.2 +25-08-31 12:38:04 | D | - Quantizing transformer_blocks.2.norm1.linear.weight +25-08-31 12:38:04 | D | + quant_dtype: sint4 +25-08-31 12:38:04 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:04 | D | + scale_dtype: (None,) +25-08-31 12:38:05 | D | - Quantizing transformer_blocks.2.norm1_context.linear.weight +25-08-31 12:38:05 | D | + quant_dtype: sint4 +25-08-31 12:38:05 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:05 | D | + scale_dtype: (None,) +25-08-31 12:38:05 | D | - Quantizing transformer_blocks.2.attn.to_q.weight +25-08-31 12:38:05 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:05 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:05 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:05 | D | - Quantizing transformer_blocks.2.attn.to_k.weight +25-08-31 12:38:05 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:05 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:05 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:05 | D | - Quantizing transformer_blocks.2.attn.to_v.weight +25-08-31 12:38:05 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:05 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:05 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:05 | D | - Quantizing transformer_blocks.2.attn.add_q_proj.weight +25-08-31 12:38:05 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:05 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:05 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:05 | D | - Quantizing transformer_blocks.2.attn.add_k_proj.weight +25-08-31 12:38:05 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:05 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:05 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:06 | D | - Quantizing transformer_blocks.2.attn.add_v_proj.weight +25-08-31 12:38:06 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:06 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:06 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:06 | D | - Quantizing transformer_blocks.2.attn.to_out.0.weight +25-08-31 12:38:06 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:06 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:06 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:06 | D | - Quantizing transformer_blocks.2.attn.to_add_out.weight +25-08-31 12:38:06 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:06 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:06 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:06 | D | - Quantizing transformer_blocks.2.ff.net.0.proj.weight +25-08-31 12:38:06 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:06 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:06 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:06 | D | - Quantizing transformer_blocks.2.ff.net.2.weight +25-08-31 12:38:06 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:06 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:06 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:07 | D | - Quantizing transformer_blocks.2.ff_context.net.0.proj.weight +25-08-31 12:38:07 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:07 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:07 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:07 | D | - Quantizing transformer_blocks.2.ff_context.net.2.weight +25-08-31 12:38:07 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:07 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:07 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:07 | D | - Quantizing weights: block transformer_blocks.3 +25-08-31 12:38:07 | D | - Quantizing transformer_blocks.3.norm1.linear.weight +25-08-31 12:38:07 | D | + quant_dtype: sint4 +25-08-31 12:38:07 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:07 | D | + scale_dtype: (None,) +25-08-31 12:38:07 | D | - Quantizing transformer_blocks.3.norm1_context.linear.weight +25-08-31 12:38:07 | D | + quant_dtype: sint4 +25-08-31 12:38:07 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:07 | D | + scale_dtype: (None,) +25-08-31 12:38:07 | D | - Quantizing transformer_blocks.3.attn.to_q.weight +25-08-31 12:38:07 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:07 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:07 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:08 | D | - Quantizing transformer_blocks.3.attn.to_k.weight +25-08-31 12:38:08 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:08 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:08 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:08 | D | - Quantizing transformer_blocks.3.attn.to_v.weight +25-08-31 12:38:08 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:08 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:08 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:08 | D | - Quantizing transformer_blocks.3.attn.add_q_proj.weight +25-08-31 12:38:08 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:08 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:08 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:08 | D | - Quantizing transformer_blocks.3.attn.add_k_proj.weight +25-08-31 12:38:08 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:08 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:08 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:08 | D | - Quantizing transformer_blocks.3.attn.add_v_proj.weight +25-08-31 12:38:08 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:08 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:08 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:09 | D | - Quantizing transformer_blocks.3.attn.to_out.0.weight +25-08-31 12:38:09 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:09 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:09 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:09 | D | - Quantizing transformer_blocks.3.attn.to_add_out.weight +25-08-31 12:38:09 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:09 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:09 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:09 | D | - Quantizing transformer_blocks.3.ff.net.0.proj.weight +25-08-31 12:38:09 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:09 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:09 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:09 | D | - Quantizing transformer_blocks.3.ff.net.2.weight +25-08-31 12:38:09 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:09 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:09 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:09 | D | - Quantizing transformer_blocks.3.ff_context.net.0.proj.weight +25-08-31 12:38:09 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:09 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:09 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:10 | D | - Quantizing transformer_blocks.3.ff_context.net.2.weight +25-08-31 12:38:10 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:10 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:10 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:10 | D | - Quantizing weights: block transformer_blocks.4 +25-08-31 12:38:10 | D | - Quantizing transformer_blocks.4.norm1.linear.weight +25-08-31 12:38:10 | D | + quant_dtype: sint4 +25-08-31 12:38:10 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:10 | D | + scale_dtype: (None,) +25-08-31 12:38:10 | D | - Quantizing transformer_blocks.4.norm1_context.linear.weight +25-08-31 12:38:10 | D | + quant_dtype: sint4 +25-08-31 12:38:10 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:10 | D | + scale_dtype: (None,) +25-08-31 12:38:10 | D | - Quantizing transformer_blocks.4.attn.to_q.weight +25-08-31 12:38:10 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:10 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:10 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:10 | D | - Quantizing transformer_blocks.4.attn.to_k.weight +25-08-31 12:38:10 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:10 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:10 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:11 | D | - Quantizing transformer_blocks.4.attn.to_v.weight +25-08-31 12:38:11 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:11 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:11 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:11 | D | - Quantizing transformer_blocks.4.attn.add_q_proj.weight +25-08-31 12:38:11 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:11 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:11 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:11 | D | - Quantizing transformer_blocks.4.attn.add_k_proj.weight +25-08-31 12:38:11 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:11 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:11 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:11 | D | - Quantizing transformer_blocks.4.attn.add_v_proj.weight +25-08-31 12:38:11 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:11 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:11 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:11 | D | - Quantizing transformer_blocks.4.attn.to_out.0.weight +25-08-31 12:38:11 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:11 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:11 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:11 | D | - Quantizing transformer_blocks.4.attn.to_add_out.weight +25-08-31 12:38:11 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:11 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:11 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:12 | D | - Quantizing transformer_blocks.4.ff.net.0.proj.weight +25-08-31 12:38:12 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:12 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:12 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:12 | D | - Quantizing transformer_blocks.4.ff.net.2.weight +25-08-31 12:38:12 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:12 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:12 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:12 | D | - Quantizing transformer_blocks.4.ff_context.net.0.proj.weight +25-08-31 12:38:12 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:12 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:12 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:12 | D | - Quantizing transformer_blocks.4.ff_context.net.2.weight +25-08-31 12:38:12 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:12 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:12 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:12 | D | - Quantizing weights: block transformer_blocks.5 +25-08-31 12:38:12 | D | - Quantizing transformer_blocks.5.norm1.linear.weight +25-08-31 12:38:12 | D | + quant_dtype: sint4 +25-08-31 12:38:12 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:12 | D | + scale_dtype: (None,) +25-08-31 12:38:13 | D | - Quantizing transformer_blocks.5.norm1_context.linear.weight +25-08-31 12:38:13 | D | + quant_dtype: sint4 +25-08-31 12:38:13 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:13 | D | + scale_dtype: (None,) +25-08-31 12:38:13 | D | - Quantizing transformer_blocks.5.attn.to_q.weight +25-08-31 12:38:13 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:13 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:13 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:13 | D | - Quantizing transformer_blocks.5.attn.to_k.weight +25-08-31 12:38:13 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:13 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:13 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:13 | D | - Quantizing transformer_blocks.5.attn.to_v.weight +25-08-31 12:38:13 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:13 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:13 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:13 | D | - Quantizing transformer_blocks.5.attn.add_q_proj.weight +25-08-31 12:38:13 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:13 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:13 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:14 | D | - Quantizing transformer_blocks.5.attn.add_k_proj.weight +25-08-31 12:38:14 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:14 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:14 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:14 | D | - Quantizing transformer_blocks.5.attn.add_v_proj.weight +25-08-31 12:38:14 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:14 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:14 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:14 | D | - Quantizing transformer_blocks.5.attn.to_out.0.weight +25-08-31 12:38:14 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:14 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:14 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:14 | D | - Quantizing transformer_blocks.5.attn.to_add_out.weight +25-08-31 12:38:14 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:14 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:14 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:14 | D | - Quantizing transformer_blocks.5.ff.net.0.proj.weight +25-08-31 12:38:14 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:14 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:14 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:15 | D | - Quantizing transformer_blocks.5.ff.net.2.weight +25-08-31 12:38:15 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:15 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:15 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:15 | D | - Quantizing transformer_blocks.5.ff_context.net.0.proj.weight +25-08-31 12:38:15 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:15 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:15 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:15 | D | - Quantizing transformer_blocks.5.ff_context.net.2.weight +25-08-31 12:38:15 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:15 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:15 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:15 | D | - Quantizing weights: block transformer_blocks.6 +25-08-31 12:38:15 | D | - Quantizing transformer_blocks.6.norm1.linear.weight +25-08-31 12:38:15 | D | + quant_dtype: sint4 +25-08-31 12:38:15 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:15 | D | + scale_dtype: (None,) +25-08-31 12:38:15 | D | - Quantizing transformer_blocks.6.norm1_context.linear.weight +25-08-31 12:38:15 | D | + quant_dtype: sint4 +25-08-31 12:38:15 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:15 | D | + scale_dtype: (None,) +25-08-31 12:38:16 | D | - Quantizing transformer_blocks.6.attn.to_q.weight +25-08-31 12:38:16 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:16 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:16 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:16 | D | - Quantizing transformer_blocks.6.attn.to_k.weight +25-08-31 12:38:16 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:16 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:16 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:16 | D | - Quantizing transformer_blocks.6.attn.to_v.weight +25-08-31 12:38:16 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:16 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:16 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:16 | D | - Quantizing transformer_blocks.6.attn.add_q_proj.weight +25-08-31 12:38:16 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:16 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:16 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:16 | D | - Quantizing transformer_blocks.6.attn.add_k_proj.weight +25-08-31 12:38:16 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:16 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:16 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:16 | D | - Quantizing transformer_blocks.6.attn.add_v_proj.weight +25-08-31 12:38:16 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:16 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:16 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:17 | D | - Quantizing transformer_blocks.6.attn.to_out.0.weight +25-08-31 12:38:17 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:17 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:17 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:17 | D | - Quantizing transformer_blocks.6.attn.to_add_out.weight +25-08-31 12:38:17 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:17 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:17 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:17 | D | - Quantizing transformer_blocks.6.ff.net.0.proj.weight +25-08-31 12:38:17 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:17 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:17 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:17 | D | - Quantizing transformer_blocks.6.ff.net.2.weight +25-08-31 12:38:17 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:17 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:17 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:17 | D | - Quantizing transformer_blocks.6.ff_context.net.0.proj.weight +25-08-31 12:38:17 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:17 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:17 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:18 | D | - Quantizing transformer_blocks.6.ff_context.net.2.weight +25-08-31 12:38:18 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:18 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:18 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:18 | D | - Quantizing weights: block transformer_blocks.7 +25-08-31 12:38:18 | D | - Quantizing transformer_blocks.7.norm1.linear.weight +25-08-31 12:38:18 | D | + quant_dtype: sint4 +25-08-31 12:38:18 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:18 | D | + scale_dtype: (None,) +25-08-31 12:38:18 | D | - Quantizing transformer_blocks.7.norm1_context.linear.weight +25-08-31 12:38:18 | D | + quant_dtype: sint4 +25-08-31 12:38:18 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:18 | D | + scale_dtype: (None,) +25-08-31 12:38:18 | D | - Quantizing transformer_blocks.7.attn.to_q.weight +25-08-31 12:38:18 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:18 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:18 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:18 | D | - Quantizing transformer_blocks.7.attn.to_k.weight +25-08-31 12:38:18 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:18 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:18 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:19 | D | - Quantizing transformer_blocks.7.attn.to_v.weight +25-08-31 12:38:19 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:19 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:19 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:19 | D | - Quantizing transformer_blocks.7.attn.add_q_proj.weight +25-08-31 12:38:19 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:19 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:19 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:19 | D | - Quantizing transformer_blocks.7.attn.add_k_proj.weight +25-08-31 12:38:19 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:19 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:19 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:19 | D | - Quantizing transformer_blocks.7.attn.add_v_proj.weight +25-08-31 12:38:19 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:19 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:19 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:19 | D | - Quantizing transformer_blocks.7.attn.to_out.0.weight +25-08-31 12:38:19 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:19 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:19 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:19 | D | - Quantizing transformer_blocks.7.attn.to_add_out.weight +25-08-31 12:38:19 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:19 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:19 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:20 | D | - Quantizing transformer_blocks.7.ff.net.0.proj.weight +25-08-31 12:38:20 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:20 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:20 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:20 | D | - Quantizing transformer_blocks.7.ff.net.2.weight +25-08-31 12:38:20 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:20 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:20 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:20 | D | - Quantizing transformer_blocks.7.ff_context.net.0.proj.weight +25-08-31 12:38:20 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:20 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:20 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:20 | D | - Quantizing transformer_blocks.7.ff_context.net.2.weight +25-08-31 12:38:20 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:20 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:20 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:20 | D | - Quantizing weights: block transformer_blocks.8 +25-08-31 12:38:20 | D | - Quantizing transformer_blocks.8.norm1.linear.weight +25-08-31 12:38:20 | D | + quant_dtype: sint4 +25-08-31 12:38:20 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:20 | D | + scale_dtype: (None,) +25-08-31 12:38:21 | D | - Quantizing transformer_blocks.8.norm1_context.linear.weight +25-08-31 12:38:21 | D | + quant_dtype: sint4 +25-08-31 12:38:21 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:21 | D | + scale_dtype: (None,) +25-08-31 12:38:21 | D | - Quantizing transformer_blocks.8.attn.to_q.weight +25-08-31 12:38:21 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:21 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:21 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:21 | D | - Quantizing transformer_blocks.8.attn.to_k.weight +25-08-31 12:38:21 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:21 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:21 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:21 | D | - Quantizing transformer_blocks.8.attn.to_v.weight +25-08-31 12:38:21 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:21 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:21 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:21 | D | - Quantizing transformer_blocks.8.attn.add_q_proj.weight +25-08-31 12:38:21 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:21 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:21 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:22 | D | - Quantizing transformer_blocks.8.attn.add_k_proj.weight +25-08-31 12:38:22 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:22 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:22 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:22 | D | - Quantizing transformer_blocks.8.attn.add_v_proj.weight +25-08-31 12:38:22 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:22 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:22 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:22 | D | - Quantizing transformer_blocks.8.attn.to_out.0.weight +25-08-31 12:38:22 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:22 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:22 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:22 | D | - Quantizing transformer_blocks.8.attn.to_add_out.weight +25-08-31 12:38:22 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:22 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:22 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:22 | D | - Quantizing transformer_blocks.8.ff.net.0.proj.weight +25-08-31 12:38:22 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:22 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:22 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:23 | D | - Quantizing transformer_blocks.8.ff.net.2.weight +25-08-31 12:38:23 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:23 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:23 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:23 | D | - Quantizing transformer_blocks.8.ff_context.net.0.proj.weight +25-08-31 12:38:23 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:23 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:23 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:23 | D | - Quantizing transformer_blocks.8.ff_context.net.2.weight +25-08-31 12:38:23 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:23 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:23 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:23 | D | - Quantizing weights: block transformer_blocks.9 +25-08-31 12:38:23 | D | - Quantizing transformer_blocks.9.norm1.linear.weight +25-08-31 12:38:23 | D | + quant_dtype: sint4 +25-08-31 12:38:23 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:23 | D | + scale_dtype: (None,) +25-08-31 12:38:23 | D | - Quantizing transformer_blocks.9.norm1_context.linear.weight +25-08-31 12:38:23 | D | + quant_dtype: sint4 +25-08-31 12:38:23 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:23 | D | + scale_dtype: (None,) +25-08-31 12:38:24 | D | - Quantizing transformer_blocks.9.attn.to_q.weight +25-08-31 12:38:24 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:24 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:24 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:24 | D | - Quantizing transformer_blocks.9.attn.to_k.weight +25-08-31 12:38:24 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:24 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:24 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:24 | D | - Quantizing transformer_blocks.9.attn.to_v.weight +25-08-31 12:38:24 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:24 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:24 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:24 | D | - Quantizing transformer_blocks.9.attn.add_q_proj.weight +25-08-31 12:38:24 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:24 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:24 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:24 | D | - Quantizing transformer_blocks.9.attn.add_k_proj.weight +25-08-31 12:38:24 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:24 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:24 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:24 | D | - Quantizing transformer_blocks.9.attn.add_v_proj.weight +25-08-31 12:38:24 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:24 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:24 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:25 | D | - Quantizing transformer_blocks.9.attn.to_out.0.weight +25-08-31 12:38:25 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:25 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:25 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:25 | D | - Quantizing transformer_blocks.9.attn.to_add_out.weight +25-08-31 12:38:25 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:25 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:25 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:25 | D | - Quantizing transformer_blocks.9.ff.net.0.proj.weight +25-08-31 12:38:25 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:25 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:25 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:25 | D | - Quantizing transformer_blocks.9.ff.net.2.weight +25-08-31 12:38:25 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:25 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:25 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:25 | D | - Quantizing transformer_blocks.9.ff_context.net.0.proj.weight +25-08-31 12:38:25 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:25 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:25 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:26 | D | - Quantizing transformer_blocks.9.ff_context.net.2.weight +25-08-31 12:38:26 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:26 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:26 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:26 | D | - Quantizing weights: block transformer_blocks.10 +25-08-31 12:38:26 | D | - Quantizing transformer_blocks.10.norm1.linear.weight +25-08-31 12:38:26 | D | + quant_dtype: sint4 +25-08-31 12:38:26 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:26 | D | + scale_dtype: (None,) +25-08-31 12:38:26 | D | - Quantizing transformer_blocks.10.norm1_context.linear.weight +25-08-31 12:38:26 | D | + quant_dtype: sint4 +25-08-31 12:38:26 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:26 | D | + scale_dtype: (None,) +25-08-31 12:38:26 | D | - Quantizing transformer_blocks.10.attn.to_q.weight +25-08-31 12:38:26 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:26 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:26 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:26 | D | - Quantizing transformer_blocks.10.attn.to_k.weight +25-08-31 12:38:26 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:26 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:26 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:27 | D | - Quantizing transformer_blocks.10.attn.to_v.weight +25-08-31 12:38:27 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:27 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:27 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:27 | D | - Quantizing transformer_blocks.10.attn.add_q_proj.weight +25-08-31 12:38:27 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:27 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:27 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:27 | D | - Quantizing transformer_blocks.10.attn.add_k_proj.weight +25-08-31 12:38:27 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:27 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:27 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:27 | D | - Quantizing transformer_blocks.10.attn.add_v_proj.weight +25-08-31 12:38:27 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:27 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:27 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:27 | D | - Quantizing transformer_blocks.10.attn.to_out.0.weight +25-08-31 12:38:27 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:27 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:27 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:28 | D | - Quantizing transformer_blocks.10.attn.to_add_out.weight +25-08-31 12:38:28 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:28 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:28 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:28 | D | - Quantizing transformer_blocks.10.ff.net.0.proj.weight +25-08-31 12:38:28 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:28 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:28 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:28 | D | - Quantizing transformer_blocks.10.ff.net.2.weight +25-08-31 12:38:28 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:28 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:28 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:28 | D | - Quantizing transformer_blocks.10.ff_context.net.0.proj.weight +25-08-31 12:38:28 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:28 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:28 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:28 | D | - Quantizing transformer_blocks.10.ff_context.net.2.weight +25-08-31 12:38:28 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:28 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:28 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:29 | D | - Quantizing weights: block transformer_blocks.11 +25-08-31 12:38:29 | D | - Quantizing transformer_blocks.11.norm1.linear.weight +25-08-31 12:38:29 | D | + quant_dtype: sint4 +25-08-31 12:38:29 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:29 | D | + scale_dtype: (None,) +25-08-31 12:38:29 | D | - Quantizing transformer_blocks.11.norm1_context.linear.weight +25-08-31 12:38:29 | D | + quant_dtype: sint4 +25-08-31 12:38:29 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:29 | D | + scale_dtype: (None,) +25-08-31 12:38:29 | D | - Quantizing transformer_blocks.11.attn.to_q.weight +25-08-31 12:38:29 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:29 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:29 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:29 | D | - Quantizing transformer_blocks.11.attn.to_k.weight +25-08-31 12:38:29 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:29 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:29 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:29 | D | - Quantizing transformer_blocks.11.attn.to_v.weight +25-08-31 12:38:29 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:29 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:29 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:30 | D | - Quantizing transformer_blocks.11.attn.add_q_proj.weight +25-08-31 12:38:30 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:30 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:30 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:30 | D | - Quantizing transformer_blocks.11.attn.add_k_proj.weight +25-08-31 12:38:30 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:30 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:30 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:30 | D | - Quantizing transformer_blocks.11.attn.add_v_proj.weight +25-08-31 12:38:30 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:30 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:30 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:30 | D | - Quantizing transformer_blocks.11.attn.to_out.0.weight +25-08-31 12:38:30 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:30 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:30 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:30 | D | - Quantizing transformer_blocks.11.attn.to_add_out.weight +25-08-31 12:38:30 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:30 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:30 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:31 | D | - Quantizing transformer_blocks.11.ff.net.0.proj.weight +25-08-31 12:38:31 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:31 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:31 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:31 | D | - Quantizing transformer_blocks.11.ff.net.2.weight +25-08-31 12:38:31 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:31 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:31 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:31 | D | - Quantizing transformer_blocks.11.ff_context.net.0.proj.weight +25-08-31 12:38:31 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:31 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:31 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:31 | D | - Quantizing transformer_blocks.11.ff_context.net.2.weight +25-08-31 12:38:31 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:31 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:31 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:31 | D | - Quantizing weights: block transformer_blocks.12 +25-08-31 12:38:31 | D | - Quantizing transformer_blocks.12.norm1.linear.weight +25-08-31 12:38:31 | D | + quant_dtype: sint4 +25-08-31 12:38:31 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:31 | D | + scale_dtype: (None,) +25-08-31 12:38:32 | D | - Quantizing transformer_blocks.12.norm1_context.linear.weight +25-08-31 12:38:32 | D | + quant_dtype: sint4 +25-08-31 12:38:32 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:32 | D | + scale_dtype: (None,) +25-08-31 12:38:32 | D | - Quantizing transformer_blocks.12.attn.to_q.weight +25-08-31 12:38:32 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:32 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:32 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:32 | D | - Quantizing transformer_blocks.12.attn.to_k.weight +25-08-31 12:38:32 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:32 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:32 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:32 | D | - Quantizing transformer_blocks.12.attn.to_v.weight +25-08-31 12:38:32 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:32 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:32 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:32 | D | - Quantizing transformer_blocks.12.attn.add_q_proj.weight +25-08-31 12:38:32 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:32 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:32 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:32 | D | - Quantizing transformer_blocks.12.attn.add_k_proj.weight +25-08-31 12:38:32 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:32 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:32 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:33 | D | - Quantizing transformer_blocks.12.attn.add_v_proj.weight +25-08-31 12:38:33 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:33 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:33 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:33 | D | - Quantizing transformer_blocks.12.attn.to_out.0.weight +25-08-31 12:38:33 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:33 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:33 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:33 | D | - Quantizing transformer_blocks.12.attn.to_add_out.weight +25-08-31 12:38:33 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:33 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:33 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:33 | D | - Quantizing transformer_blocks.12.ff.net.0.proj.weight +25-08-31 12:38:33 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:33 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:33 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:33 | D | - Quantizing transformer_blocks.12.ff.net.2.weight +25-08-31 12:38:33 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:33 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:33 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:34 | D | - Quantizing transformer_blocks.12.ff_context.net.0.proj.weight +25-08-31 12:38:34 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:34 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:34 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:34 | D | - Quantizing transformer_blocks.12.ff_context.net.2.weight +25-08-31 12:38:34 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:34 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:34 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:34 | D | - Quantizing weights: block transformer_blocks.13 +25-08-31 12:38:34 | D | - Quantizing transformer_blocks.13.norm1.linear.weight +25-08-31 12:38:34 | D | + quant_dtype: sint4 +25-08-31 12:38:34 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:34 | D | + scale_dtype: (None,) +25-08-31 12:38:34 | D | - Quantizing transformer_blocks.13.norm1_context.linear.weight +25-08-31 12:38:34 | D | + quant_dtype: sint4 +25-08-31 12:38:34 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:34 | D | + scale_dtype: (None,) +25-08-31 12:38:34 | D | - Quantizing transformer_blocks.13.attn.to_q.weight +25-08-31 12:38:34 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:34 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:34 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:35 | D | - Quantizing transformer_blocks.13.attn.to_k.weight +25-08-31 12:38:35 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:35 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:35 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:35 | D | - Quantizing transformer_blocks.13.attn.to_v.weight +25-08-31 12:38:35 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:35 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:35 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:35 | D | - Quantizing transformer_blocks.13.attn.add_q_proj.weight +25-08-31 12:38:35 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:35 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:35 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:35 | D | - Quantizing transformer_blocks.13.attn.add_k_proj.weight +25-08-31 12:38:35 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:35 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:35 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:35 | D | - Quantizing transformer_blocks.13.attn.add_v_proj.weight +25-08-31 12:38:35 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:35 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:35 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:36 | D | - Quantizing transformer_blocks.13.attn.to_out.0.weight +25-08-31 12:38:36 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:36 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:36 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:36 | D | - Quantizing transformer_blocks.13.attn.to_add_out.weight +25-08-31 12:38:36 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:36 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:36 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:36 | D | - Quantizing transformer_blocks.13.ff.net.0.proj.weight +25-08-31 12:38:36 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:36 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:36 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:36 | D | - Quantizing transformer_blocks.13.ff.net.2.weight +25-08-31 12:38:36 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:36 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:36 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:36 | D | - Quantizing transformer_blocks.13.ff_context.net.0.proj.weight +25-08-31 12:38:36 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:36 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:36 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:37 | D | - Quantizing transformer_blocks.13.ff_context.net.2.weight +25-08-31 12:38:37 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:37 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:37 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:37 | D | - Quantizing weights: block transformer_blocks.14 +25-08-31 12:38:37 | D | - Quantizing transformer_blocks.14.norm1.linear.weight +25-08-31 12:38:37 | D | + quant_dtype: sint4 +25-08-31 12:38:37 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:37 | D | + scale_dtype: (None,) +25-08-31 12:38:37 | D | - Quantizing transformer_blocks.14.norm1_context.linear.weight +25-08-31 12:38:37 | D | + quant_dtype: sint4 +25-08-31 12:38:37 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:37 | D | + scale_dtype: (None,) +25-08-31 12:38:37 | D | - Quantizing transformer_blocks.14.attn.to_q.weight +25-08-31 12:38:37 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:37 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:37 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:37 | D | - Quantizing transformer_blocks.14.attn.to_k.weight +25-08-31 12:38:37 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:37 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:37 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:38 | D | - Quantizing transformer_blocks.14.attn.to_v.weight +25-08-31 12:38:38 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:38 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:38 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:38 | D | - Quantizing transformer_blocks.14.attn.add_q_proj.weight +25-08-31 12:38:38 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:38 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:38 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:38 | D | - Quantizing transformer_blocks.14.attn.add_k_proj.weight +25-08-31 12:38:38 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:38 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:38 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:38 | D | - Quantizing transformer_blocks.14.attn.add_v_proj.weight +25-08-31 12:38:38 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:38 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:38 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:38 | D | - Quantizing transformer_blocks.14.attn.to_out.0.weight +25-08-31 12:38:38 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:38 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:38 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:38 | D | - Quantizing transformer_blocks.14.attn.to_add_out.weight +25-08-31 12:38:38 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:38 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:38 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:39 | D | - Quantizing transformer_blocks.14.ff.net.0.proj.weight +25-08-31 12:38:39 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:39 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:39 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:39 | D | - Quantizing transformer_blocks.14.ff.net.2.weight +25-08-31 12:38:39 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:39 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:39 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:39 | D | - Quantizing transformer_blocks.14.ff_context.net.0.proj.weight +25-08-31 12:38:39 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:39 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:39 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:39 | D | - Quantizing transformer_blocks.14.ff_context.net.2.weight +25-08-31 12:38:39 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:39 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:39 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:39 | D | - Quantizing weights: block transformer_blocks.15 +25-08-31 12:38:39 | D | - Quantizing transformer_blocks.15.norm1.linear.weight +25-08-31 12:38:39 | D | + quant_dtype: sint4 +25-08-31 12:38:39 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:39 | D | + scale_dtype: (None,) +25-08-31 12:38:40 | D | - Quantizing transformer_blocks.15.norm1_context.linear.weight +25-08-31 12:38:40 | D | + quant_dtype: sint4 +25-08-31 12:38:40 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:40 | D | + scale_dtype: (None,) +25-08-31 12:38:40 | D | - Quantizing transformer_blocks.15.attn.to_q.weight +25-08-31 12:38:40 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:40 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:40 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:40 | D | - Quantizing transformer_blocks.15.attn.to_k.weight +25-08-31 12:38:40 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:40 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:40 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:40 | D | - Quantizing transformer_blocks.15.attn.to_v.weight +25-08-31 12:38:40 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:40 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:40 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:40 | D | - Quantizing transformer_blocks.15.attn.add_q_proj.weight +25-08-31 12:38:40 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:40 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:40 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:41 | D | - Quantizing transformer_blocks.15.attn.add_k_proj.weight +25-08-31 12:38:41 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:41 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:41 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:41 | D | - Quantizing transformer_blocks.15.attn.add_v_proj.weight +25-08-31 12:38:41 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:41 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:41 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:41 | D | - Quantizing transformer_blocks.15.attn.to_out.0.weight +25-08-31 12:38:41 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:41 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:41 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:41 | D | - Quantizing transformer_blocks.15.attn.to_add_out.weight +25-08-31 12:38:41 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:41 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:41 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:41 | D | - Quantizing transformer_blocks.15.ff.net.0.proj.weight +25-08-31 12:38:41 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:41 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:41 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:42 | D | - Quantizing transformer_blocks.15.ff.net.2.weight +25-08-31 12:38:42 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:42 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:42 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:42 | D | - Quantizing transformer_blocks.15.ff_context.net.0.proj.weight +25-08-31 12:38:42 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:42 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:42 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:42 | D | - Quantizing transformer_blocks.15.ff_context.net.2.weight +25-08-31 12:38:42 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:42 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:42 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:42 | D | - Quantizing weights: block transformer_blocks.16 +25-08-31 12:38:42 | D | - Quantizing transformer_blocks.16.norm1.linear.weight +25-08-31 12:38:42 | D | + quant_dtype: sint4 +25-08-31 12:38:42 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:42 | D | + scale_dtype: (None,) +25-08-31 12:38:42 | D | - Quantizing transformer_blocks.16.norm1_context.linear.weight +25-08-31 12:38:42 | D | + quant_dtype: sint4 +25-08-31 12:38:42 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:42 | D | + scale_dtype: (None,) +25-08-31 12:38:43 | D | - Quantizing transformer_blocks.16.attn.to_q.weight +25-08-31 12:38:43 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:43 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:43 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:43 | D | - Quantizing transformer_blocks.16.attn.to_k.weight +25-08-31 12:38:43 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:43 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:43 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:43 | D | - Quantizing transformer_blocks.16.attn.to_v.weight +25-08-31 12:38:43 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:43 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:43 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:43 | D | - Quantizing transformer_blocks.16.attn.add_q_proj.weight +25-08-31 12:38:43 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:43 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:43 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:43 | D | - Quantizing transformer_blocks.16.attn.add_k_proj.weight +25-08-31 12:38:43 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:43 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:43 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:44 | D | - Quantizing transformer_blocks.16.attn.add_v_proj.weight +25-08-31 12:38:44 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:44 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:44 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:44 | D | - Quantizing transformer_blocks.16.attn.to_out.0.weight +25-08-31 12:38:44 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:44 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:44 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:44 | D | - Quantizing transformer_blocks.16.attn.to_add_out.weight +25-08-31 12:38:44 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:44 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:44 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:44 | D | - Quantizing transformer_blocks.16.ff.net.0.proj.weight +25-08-31 12:38:44 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:44 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:44 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:44 | D | - Quantizing transformer_blocks.16.ff.net.2.weight +25-08-31 12:38:44 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:44 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:44 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:44 | D | - Quantizing transformer_blocks.16.ff_context.net.0.proj.weight +25-08-31 12:38:44 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:44 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:44 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:45 | D | - Quantizing transformer_blocks.16.ff_context.net.2.weight +25-08-31 12:38:45 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:45 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:45 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:45 | D | - Quantizing weights: block transformer_blocks.17 +25-08-31 12:38:45 | D | - Quantizing transformer_blocks.17.norm1.linear.weight +25-08-31 12:38:45 | D | + quant_dtype: sint4 +25-08-31 12:38:45 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:45 | D | + scale_dtype: (None,) +25-08-31 12:38:45 | D | - Quantizing transformer_blocks.17.norm1_context.linear.weight +25-08-31 12:38:45 | D | + quant_dtype: sint4 +25-08-31 12:38:45 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:45 | D | + scale_dtype: (None,) +25-08-31 12:38:45 | D | - Quantizing transformer_blocks.17.attn.to_q.weight +25-08-31 12:38:45 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:45 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:45 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:45 | D | - Quantizing transformer_blocks.17.attn.to_k.weight +25-08-31 12:38:45 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:45 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:45 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:46 | D | - Quantizing transformer_blocks.17.attn.to_v.weight +25-08-31 12:38:46 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:46 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:46 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:46 | D | - Quantizing transformer_blocks.17.attn.add_q_proj.weight +25-08-31 12:38:46 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:46 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:46 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:46 | D | - Quantizing transformer_blocks.17.attn.add_k_proj.weight +25-08-31 12:38:46 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:46 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:46 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:46 | D | - Quantizing transformer_blocks.17.attn.add_v_proj.weight +25-08-31 12:38:46 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:46 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:46 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:46 | D | - Quantizing transformer_blocks.17.attn.to_out.0.weight +25-08-31 12:38:46 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:46 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:46 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:47 | D | - Quantizing transformer_blocks.17.attn.to_add_out.weight +25-08-31 12:38:47 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:47 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:47 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:47 | D | - Quantizing transformer_blocks.17.ff.net.0.proj.weight +25-08-31 12:38:47 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:47 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:47 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:47 | D | - Quantizing transformer_blocks.17.ff.net.2.weight +25-08-31 12:38:47 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:47 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:47 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:47 | D | - Quantizing transformer_blocks.17.ff_context.net.0.proj.weight +25-08-31 12:38:47 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:47 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:47 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:47 | D | - Quantizing transformer_blocks.17.ff_context.net.2.weight +25-08-31 12:38:47 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:47 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:47 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:48 | D | - Quantizing weights: block transformer_blocks.18 +25-08-31 12:38:48 | D | - Quantizing transformer_blocks.18.norm1.linear.weight +25-08-31 12:38:48 | D | + quant_dtype: sint4 +25-08-31 12:38:48 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:48 | D | + scale_dtype: (None,) +25-08-31 12:38:48 | D | - Quantizing transformer_blocks.18.norm1_context.linear.weight +25-08-31 12:38:48 | D | + quant_dtype: sint4 +25-08-31 12:38:48 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:48 | D | + scale_dtype: (None,) +25-08-31 12:38:48 | D | - Quantizing transformer_blocks.18.attn.to_q.weight +25-08-31 12:38:48 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:48 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:48 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:48 | D | - Quantizing transformer_blocks.18.attn.to_k.weight +25-08-31 12:38:48 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:48 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:48 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:48 | D | - Quantizing transformer_blocks.18.attn.to_v.weight +25-08-31 12:38:48 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:48 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:48 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:49 | D | - Quantizing transformer_blocks.18.attn.add_q_proj.weight +25-08-31 12:38:49 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:49 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:49 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:49 | D | - Quantizing transformer_blocks.18.attn.add_k_proj.weight +25-08-31 12:38:49 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:49 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:49 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:49 | D | - Quantizing transformer_blocks.18.attn.add_v_proj.weight +25-08-31 12:38:49 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:49 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:49 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:49 | D | - Quantizing transformer_blocks.18.attn.to_out.0.weight +25-08-31 12:38:49 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:49 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:49 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:49 | D | - Quantizing transformer_blocks.18.attn.to_add_out.weight +25-08-31 12:38:49 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:49 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:49 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:50 | D | - Quantizing transformer_blocks.18.ff.net.0.proj.weight +25-08-31 12:38:50 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:50 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:50 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:50 | D | - Quantizing transformer_blocks.18.ff.net.2.weight +25-08-31 12:38:50 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:50 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:50 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:50 | D | - Quantizing transformer_blocks.18.ff_context.net.0.proj.weight +25-08-31 12:38:50 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:50 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:50 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:50 | D | - Quantizing transformer_blocks.18.ff_context.net.2.weight +25-08-31 12:38:50 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:50 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:50 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:50 | D | - Quantizing weights: block single_transformer_blocks.0 +25-08-31 12:38:50 | D | - Quantizing single_transformer_blocks.0.norm.linear.weight +25-08-31 12:38:50 | D | + quant_dtype: sint4 +25-08-31 12:38:50 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:50 | D | + scale_dtype: (None,) +25-08-31 12:38:50 | D | - Quantizing single_transformer_blocks.0.attn.to_q.weight +25-08-31 12:38:50 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:50 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:50 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:51 | D | - Quantizing single_transformer_blocks.0.attn.to_k.weight +25-08-31 12:38:51 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:51 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:51 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:51 | D | - Quantizing single_transformer_blocks.0.attn.to_v.weight +25-08-31 12:38:51 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:51 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:51 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:51 | D | - Quantizing single_transformer_blocks.0.proj_out.linears.0.weight +25-08-31 12:38:51 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:51 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:51 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:51 | D | - Quantizing single_transformer_blocks.0.proj_mlp.weight +25-08-31 12:38:51 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:51 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:51 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:51 | D | - Quantizing single_transformer_blocks.0.proj_out.linears.1.weight +25-08-31 12:38:51 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:51 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:51 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:52 | D | - Quantizing weights: block single_transformer_blocks.1 +25-08-31 12:38:52 | D | - Quantizing single_transformer_blocks.1.norm.linear.weight +25-08-31 12:38:52 | D | + quant_dtype: sint4 +25-08-31 12:38:52 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:52 | D | + scale_dtype: (None,) +25-08-31 12:38:52 | D | - Quantizing single_transformer_blocks.1.attn.to_q.weight +25-08-31 12:38:52 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:52 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:52 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:52 | D | - Quantizing single_transformer_blocks.1.attn.to_k.weight +25-08-31 12:38:52 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:52 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:52 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:52 | D | - Quantizing single_transformer_blocks.1.attn.to_v.weight +25-08-31 12:38:52 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:52 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:52 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:52 | D | - Quantizing single_transformer_blocks.1.proj_out.linears.0.weight +25-08-31 12:38:52 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:52 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:52 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:53 | D | - Quantizing single_transformer_blocks.1.proj_mlp.weight +25-08-31 12:38:53 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:53 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:53 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:53 | D | - Quantizing single_transformer_blocks.1.proj_out.linears.1.weight +25-08-31 12:38:53 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:53 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:53 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:53 | D | - Quantizing weights: block single_transformer_blocks.2 +25-08-31 12:38:53 | D | - Quantizing single_transformer_blocks.2.norm.linear.weight +25-08-31 12:38:53 | D | + quant_dtype: sint4 +25-08-31 12:38:53 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:53 | D | + scale_dtype: (None,) +25-08-31 12:38:53 | D | - Quantizing single_transformer_blocks.2.attn.to_q.weight +25-08-31 12:38:53 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:53 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:53 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:53 | D | - Quantizing single_transformer_blocks.2.attn.to_k.weight +25-08-31 12:38:53 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:53 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:53 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:54 | D | - Quantizing single_transformer_blocks.2.attn.to_v.weight +25-08-31 12:38:54 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:54 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:54 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:54 | D | - Quantizing single_transformer_blocks.2.proj_out.linears.0.weight +25-08-31 12:38:54 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:54 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:54 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:54 | D | - Quantizing single_transformer_blocks.2.proj_mlp.weight +25-08-31 12:38:54 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:54 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:54 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:54 | D | - Quantizing single_transformer_blocks.2.proj_out.linears.1.weight +25-08-31 12:38:54 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:54 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:54 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:54 | D | - Quantizing weights: block single_transformer_blocks.3 +25-08-31 12:38:54 | D | - Quantizing single_transformer_blocks.3.norm.linear.weight +25-08-31 12:38:54 | D | + quant_dtype: sint4 +25-08-31 12:38:54 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:54 | D | + scale_dtype: (None,) +25-08-31 12:38:54 | D | - Quantizing single_transformer_blocks.3.attn.to_q.weight +25-08-31 12:38:54 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:54 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:54 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:55 | D | - Quantizing single_transformer_blocks.3.attn.to_k.weight +25-08-31 12:38:55 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:55 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:55 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:55 | D | - Quantizing single_transformer_blocks.3.attn.to_v.weight +25-08-31 12:38:55 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:55 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:55 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:55 | D | - Quantizing single_transformer_blocks.3.proj_out.linears.0.weight +25-08-31 12:38:55 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:55 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:55 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:55 | D | - Quantizing single_transformer_blocks.3.proj_mlp.weight +25-08-31 12:38:55 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:55 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:55 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:55 | D | - Quantizing single_transformer_blocks.3.proj_out.linears.1.weight +25-08-31 12:38:55 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:55 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:55 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:56 | D | - Quantizing weights: block single_transformer_blocks.4 +25-08-31 12:38:56 | D | - Quantizing single_transformer_blocks.4.norm.linear.weight +25-08-31 12:38:56 | D | + quant_dtype: sint4 +25-08-31 12:38:56 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:56 | D | + scale_dtype: (None,) +25-08-31 12:38:56 | D | - Quantizing single_transformer_blocks.4.attn.to_q.weight +25-08-31 12:38:56 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:56 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:56 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:56 | D | - Quantizing single_transformer_blocks.4.attn.to_k.weight +25-08-31 12:38:56 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:56 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:56 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:56 | D | - Quantizing single_transformer_blocks.4.attn.to_v.weight +25-08-31 12:38:56 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:56 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:56 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:56 | D | - Quantizing single_transformer_blocks.4.proj_out.linears.0.weight +25-08-31 12:38:56 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:56 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:56 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:57 | D | - Quantizing single_transformer_blocks.4.proj_mlp.weight +25-08-31 12:38:57 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:57 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:57 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:57 | D | - Quantizing single_transformer_blocks.4.proj_out.linears.1.weight +25-08-31 12:38:57 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:57 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:57 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:57 | D | - Quantizing weights: block single_transformer_blocks.5 +25-08-31 12:38:57 | D | - Quantizing single_transformer_blocks.5.norm.linear.weight +25-08-31 12:38:57 | D | + quant_dtype: sint4 +25-08-31 12:38:57 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:57 | D | + scale_dtype: (None,) +25-08-31 12:38:57 | D | - Quantizing single_transformer_blocks.5.attn.to_q.weight +25-08-31 12:38:57 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:57 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:57 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:57 | D | - Quantizing single_transformer_blocks.5.attn.to_k.weight +25-08-31 12:38:57 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:57 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:57 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:58 | D | - Quantizing single_transformer_blocks.5.attn.to_v.weight +25-08-31 12:38:58 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:58 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:58 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:58 | D | - Quantizing single_transformer_blocks.5.proj_out.linears.0.weight +25-08-31 12:38:58 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:58 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:58 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:58 | D | - Quantizing single_transformer_blocks.5.proj_mlp.weight +25-08-31 12:38:58 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:58 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:58 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:58 | D | - Quantizing single_transformer_blocks.5.proj_out.linears.1.weight +25-08-31 12:38:58 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:58 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:58 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:58 | D | - Quantizing weights: block single_transformer_blocks.6 +25-08-31 12:38:58 | D | - Quantizing single_transformer_blocks.6.norm.linear.weight +25-08-31 12:38:58 | D | + quant_dtype: sint4 +25-08-31 12:38:58 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:38:58 | D | + scale_dtype: (None,) +25-08-31 12:38:59 | D | - Quantizing single_transformer_blocks.6.attn.to_q.weight +25-08-31 12:38:59 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:59 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:59 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:59 | D | - Quantizing single_transformer_blocks.6.attn.to_k.weight +25-08-31 12:38:59 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:59 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:59 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:59 | D | - Quantizing single_transformer_blocks.6.attn.to_v.weight +25-08-31 12:38:59 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:59 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:59 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:59 | D | - Quantizing single_transformer_blocks.6.proj_out.linears.0.weight +25-08-31 12:38:59 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:59 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:59 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:38:59 | D | - Quantizing single_transformer_blocks.6.proj_mlp.weight +25-08-31 12:38:59 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:38:59 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:38:59 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:00 | D | - Quantizing single_transformer_blocks.6.proj_out.linears.1.weight +25-08-31 12:39:00 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:00 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:00 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:00 | D | - Quantizing weights: block single_transformer_blocks.7 +25-08-31 12:39:00 | D | - Quantizing single_transformer_blocks.7.norm.linear.weight +25-08-31 12:39:00 | D | + quant_dtype: sint4 +25-08-31 12:39:00 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:00 | D | + scale_dtype: (None,) +25-08-31 12:39:00 | D | - Quantizing single_transformer_blocks.7.attn.to_q.weight +25-08-31 12:39:00 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:00 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:00 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:00 | D | - Quantizing single_transformer_blocks.7.attn.to_k.weight +25-08-31 12:39:00 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:00 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:00 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:00 | D | - Quantizing single_transformer_blocks.7.attn.to_v.weight +25-08-31 12:39:00 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:00 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:00 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:01 | D | - Quantizing single_transformer_blocks.7.proj_out.linears.0.weight +25-08-31 12:39:01 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:01 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:01 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:01 | D | - Quantizing single_transformer_blocks.7.proj_mlp.weight +25-08-31 12:39:01 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:01 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:01 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:01 | D | - Quantizing single_transformer_blocks.7.proj_out.linears.1.weight +25-08-31 12:39:01 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:01 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:01 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:01 | D | - Quantizing weights: block single_transformer_blocks.8 +25-08-31 12:39:01 | D | - Quantizing single_transformer_blocks.8.norm.linear.weight +25-08-31 12:39:01 | D | + quant_dtype: sint4 +25-08-31 12:39:01 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:01 | D | + scale_dtype: (None,) +25-08-31 12:39:01 | D | - Quantizing single_transformer_blocks.8.attn.to_q.weight +25-08-31 12:39:01 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:01 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:01 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:02 | D | - Quantizing single_transformer_blocks.8.attn.to_k.weight +25-08-31 12:39:02 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:02 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:02 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:02 | D | - Quantizing single_transformer_blocks.8.attn.to_v.weight +25-08-31 12:39:02 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:02 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:02 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:02 | D | - Quantizing single_transformer_blocks.8.proj_out.linears.0.weight +25-08-31 12:39:02 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:02 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:02 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:02 | D | - Quantizing single_transformer_blocks.8.proj_mlp.weight +25-08-31 12:39:02 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:02 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:02 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:02 | D | - Quantizing single_transformer_blocks.8.proj_out.linears.1.weight +25-08-31 12:39:02 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:02 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:02 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:02 | D | - Quantizing weights: block single_transformer_blocks.9 +25-08-31 12:39:02 | D | - Quantizing single_transformer_blocks.9.norm.linear.weight +25-08-31 12:39:02 | D | + quant_dtype: sint4 +25-08-31 12:39:02 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:02 | D | + scale_dtype: (None,) +25-08-31 12:39:03 | D | - Quantizing single_transformer_blocks.9.attn.to_q.weight +25-08-31 12:39:03 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:03 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:03 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:03 | D | - Quantizing single_transformer_blocks.9.attn.to_k.weight +25-08-31 12:39:03 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:03 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:03 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:03 | D | - Quantizing single_transformer_blocks.9.attn.to_v.weight +25-08-31 12:39:03 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:03 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:03 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:03 | D | - Quantizing single_transformer_blocks.9.proj_out.linears.0.weight +25-08-31 12:39:03 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:03 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:03 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:03 | D | - Quantizing single_transformer_blocks.9.proj_mlp.weight +25-08-31 12:39:03 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:03 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:03 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:04 | D | - Quantizing single_transformer_blocks.9.proj_out.linears.1.weight +25-08-31 12:39:04 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:04 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:04 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:04 | D | - Quantizing weights: block single_transformer_blocks.10 +25-08-31 12:39:04 | D | - Quantizing single_transformer_blocks.10.norm.linear.weight +25-08-31 12:39:04 | D | + quant_dtype: sint4 +25-08-31 12:39:04 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:04 | D | + scale_dtype: (None,) +25-08-31 12:39:04 | D | - Quantizing single_transformer_blocks.10.attn.to_q.weight +25-08-31 12:39:04 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:04 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:04 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:04 | D | - Quantizing single_transformer_blocks.10.attn.to_k.weight +25-08-31 12:39:04 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:04 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:04 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:04 | D | - Quantizing single_transformer_blocks.10.attn.to_v.weight +25-08-31 12:39:04 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:04 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:04 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:05 | D | - Quantizing single_transformer_blocks.10.proj_out.linears.0.weight +25-08-31 12:39:05 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:05 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:05 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:05 | D | - Quantizing single_transformer_blocks.10.proj_mlp.weight +25-08-31 12:39:05 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:05 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:05 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:05 | D | - Quantizing single_transformer_blocks.10.proj_out.linears.1.weight +25-08-31 12:39:05 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:05 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:05 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:05 | D | - Quantizing weights: block single_transformer_blocks.11 +25-08-31 12:39:05 | D | - Quantizing single_transformer_blocks.11.norm.linear.weight +25-08-31 12:39:05 | D | + quant_dtype: sint4 +25-08-31 12:39:05 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:05 | D | + scale_dtype: (None,) +25-08-31 12:39:05 | D | - Quantizing single_transformer_blocks.11.attn.to_q.weight +25-08-31 12:39:05 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:05 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:05 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:06 | D | - Quantizing single_transformer_blocks.11.attn.to_k.weight +25-08-31 12:39:06 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:06 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:06 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:06 | D | - Quantizing single_transformer_blocks.11.attn.to_v.weight +25-08-31 12:39:06 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:06 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:06 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:06 | D | - Quantizing single_transformer_blocks.11.proj_out.linears.0.weight +25-08-31 12:39:06 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:06 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:06 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:06 | D | - Quantizing single_transformer_blocks.11.proj_mlp.weight +25-08-31 12:39:06 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:06 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:06 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:06 | D | - Quantizing single_transformer_blocks.11.proj_out.linears.1.weight +25-08-31 12:39:06 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:06 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:06 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:07 | D | - Quantizing weights: block single_transformer_blocks.12 +25-08-31 12:39:07 | D | - Quantizing single_transformer_blocks.12.norm.linear.weight +25-08-31 12:39:07 | D | + quant_dtype: sint4 +25-08-31 12:39:07 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:07 | D | + scale_dtype: (None,) +25-08-31 12:39:07 | D | - Quantizing single_transformer_blocks.12.attn.to_q.weight +25-08-31 12:39:07 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:07 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:07 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:07 | D | - Quantizing single_transformer_blocks.12.attn.to_k.weight +25-08-31 12:39:07 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:07 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:07 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:07 | D | - Quantizing single_transformer_blocks.12.attn.to_v.weight +25-08-31 12:39:07 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:07 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:07 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:07 | D | - Quantizing single_transformer_blocks.12.proj_out.linears.0.weight +25-08-31 12:39:07 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:07 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:07 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:08 | D | - Quantizing single_transformer_blocks.12.proj_mlp.weight +25-08-31 12:39:08 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:08 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:08 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:08 | D | - Quantizing single_transformer_blocks.12.proj_out.linears.1.weight +25-08-31 12:39:08 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:08 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:08 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:08 | D | - Quantizing weights: block single_transformer_blocks.13 +25-08-31 12:39:08 | D | - Quantizing single_transformer_blocks.13.norm.linear.weight +25-08-31 12:39:08 | D | + quant_dtype: sint4 +25-08-31 12:39:08 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:08 | D | + scale_dtype: (None,) +25-08-31 12:39:08 | D | - Quantizing single_transformer_blocks.13.attn.to_q.weight +25-08-31 12:39:08 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:08 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:08 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:08 | D | - Quantizing single_transformer_blocks.13.attn.to_k.weight +25-08-31 12:39:08 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:08 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:08 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:08 | D | - Quantizing single_transformer_blocks.13.attn.to_v.weight +25-08-31 12:39:08 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:08 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:08 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:09 | D | - Quantizing single_transformer_blocks.13.proj_out.linears.0.weight +25-08-31 12:39:09 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:09 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:09 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:09 | D | - Quantizing single_transformer_blocks.13.proj_mlp.weight +25-08-31 12:39:09 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:09 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:09 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:09 | D | - Quantizing single_transformer_blocks.13.proj_out.linears.1.weight +25-08-31 12:39:09 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:09 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:09 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:09 | D | - Quantizing weights: block single_transformer_blocks.14 +25-08-31 12:39:09 | D | - Quantizing single_transformer_blocks.14.norm.linear.weight +25-08-31 12:39:09 | D | + quant_dtype: sint4 +25-08-31 12:39:09 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:09 | D | + scale_dtype: (None,) +25-08-31 12:39:09 | D | - Quantizing single_transformer_blocks.14.attn.to_q.weight +25-08-31 12:39:09 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:09 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:09 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:10 | D | - Quantizing single_transformer_blocks.14.attn.to_k.weight +25-08-31 12:39:10 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:10 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:10 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:10 | D | - Quantizing single_transformer_blocks.14.attn.to_v.weight +25-08-31 12:39:10 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:10 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:10 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:10 | D | - Quantizing single_transformer_blocks.14.proj_out.linears.0.weight +25-08-31 12:39:10 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:10 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:10 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:10 | D | - Quantizing single_transformer_blocks.14.proj_mlp.weight +25-08-31 12:39:10 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:10 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:10 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:10 | D | - Quantizing single_transformer_blocks.14.proj_out.linears.1.weight +25-08-31 12:39:10 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:10 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:10 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:11 | D | - Quantizing weights: block single_transformer_blocks.15 +25-08-31 12:39:11 | D | - Quantizing single_transformer_blocks.15.norm.linear.weight +25-08-31 12:39:11 | D | + quant_dtype: sint4 +25-08-31 12:39:11 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:11 | D | + scale_dtype: (None,) +25-08-31 12:39:11 | D | - Quantizing single_transformer_blocks.15.attn.to_q.weight +25-08-31 12:39:11 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:11 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:11 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:11 | D | - Quantizing single_transformer_blocks.15.attn.to_k.weight +25-08-31 12:39:11 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:11 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:11 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:11 | D | - Quantizing single_transformer_blocks.15.attn.to_v.weight +25-08-31 12:39:11 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:11 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:11 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:11 | D | - Quantizing single_transformer_blocks.15.proj_out.linears.0.weight +25-08-31 12:39:11 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:11 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:11 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:12 | D | - Quantizing single_transformer_blocks.15.proj_mlp.weight +25-08-31 12:39:12 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:12 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:12 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:12 | D | - Quantizing single_transformer_blocks.15.proj_out.linears.1.weight +25-08-31 12:39:12 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:12 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:12 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:12 | D | - Quantizing weights: block single_transformer_blocks.16 +25-08-31 12:39:12 | D | - Quantizing single_transformer_blocks.16.norm.linear.weight +25-08-31 12:39:12 | D | + quant_dtype: sint4 +25-08-31 12:39:12 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:12 | D | + scale_dtype: (None,) +25-08-31 12:39:12 | D | - Quantizing single_transformer_blocks.16.attn.to_q.weight +25-08-31 12:39:12 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:12 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:12 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:12 | D | - Quantizing single_transformer_blocks.16.attn.to_k.weight +25-08-31 12:39:12 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:12 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:12 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:13 | D | - Quantizing single_transformer_blocks.16.attn.to_v.weight +25-08-31 12:39:13 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:13 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:13 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:13 | D | - Quantizing single_transformer_blocks.16.proj_out.linears.0.weight +25-08-31 12:39:13 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:13 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:13 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:13 | D | - Quantizing single_transformer_blocks.16.proj_mlp.weight +25-08-31 12:39:13 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:13 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:13 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:13 | D | - Quantizing single_transformer_blocks.16.proj_out.linears.1.weight +25-08-31 12:39:13 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:13 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:13 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:13 | D | - Quantizing weights: block single_transformer_blocks.17 +25-08-31 12:39:13 | D | - Quantizing single_transformer_blocks.17.norm.linear.weight +25-08-31 12:39:13 | D | + quant_dtype: sint4 +25-08-31 12:39:13 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:13 | D | + scale_dtype: (None,) +25-08-31 12:39:13 | D | - Quantizing single_transformer_blocks.17.attn.to_q.weight +25-08-31 12:39:13 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:13 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:13 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:14 | D | - Quantizing single_transformer_blocks.17.attn.to_k.weight +25-08-31 12:39:14 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:14 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:14 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:14 | D | - Quantizing single_transformer_blocks.17.attn.to_v.weight +25-08-31 12:39:14 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:14 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:14 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:14 | D | - Quantizing single_transformer_blocks.17.proj_out.linears.0.weight +25-08-31 12:39:14 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:14 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:14 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:14 | D | - Quantizing single_transformer_blocks.17.proj_mlp.weight +25-08-31 12:39:14 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:14 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:14 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:14 | D | - Quantizing single_transformer_blocks.17.proj_out.linears.1.weight +25-08-31 12:39:14 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:14 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:14 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:15 | D | - Quantizing weights: block single_transformer_blocks.18 +25-08-31 12:39:15 | D | - Quantizing single_transformer_blocks.18.norm.linear.weight +25-08-31 12:39:15 | D | + quant_dtype: sint4 +25-08-31 12:39:15 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:15 | D | + scale_dtype: (None,) +25-08-31 12:39:15 | D | - Quantizing single_transformer_blocks.18.attn.to_q.weight +25-08-31 12:39:15 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:15 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:15 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:15 | D | - Quantizing single_transformer_blocks.18.attn.to_k.weight +25-08-31 12:39:15 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:15 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:15 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:15 | D | - Quantizing single_transformer_blocks.18.attn.to_v.weight +25-08-31 12:39:15 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:15 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:15 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:15 | D | - Quantizing single_transformer_blocks.18.proj_out.linears.0.weight +25-08-31 12:39:15 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:15 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:15 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:16 | D | - Quantizing single_transformer_blocks.18.proj_mlp.weight +25-08-31 12:39:16 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:16 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:16 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:16 | D | - Quantizing single_transformer_blocks.18.proj_out.linears.1.weight +25-08-31 12:39:16 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:16 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:16 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:16 | D | - Quantizing weights: block single_transformer_blocks.19 +25-08-31 12:39:16 | D | - Quantizing single_transformer_blocks.19.norm.linear.weight +25-08-31 12:39:16 | D | + quant_dtype: sint4 +25-08-31 12:39:16 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:16 | D | + scale_dtype: (None,) +25-08-31 12:39:16 | D | - Quantizing single_transformer_blocks.19.attn.to_q.weight +25-08-31 12:39:16 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:16 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:16 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:16 | D | - Quantizing single_transformer_blocks.19.attn.to_k.weight +25-08-31 12:39:16 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:16 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:16 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:17 | D | - Quantizing single_transformer_blocks.19.attn.to_v.weight +25-08-31 12:39:17 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:17 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:17 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:17 | D | - Quantizing single_transformer_blocks.19.proj_out.linears.0.weight +25-08-31 12:39:17 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:17 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:17 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:17 | D | - Quantizing single_transformer_blocks.19.proj_mlp.weight +25-08-31 12:39:17 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:17 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:17 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:17 | D | - Quantizing single_transformer_blocks.19.proj_out.linears.1.weight +25-08-31 12:39:17 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:17 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:17 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:17 | D | - Quantizing weights: block single_transformer_blocks.20 +25-08-31 12:39:17 | D | - Quantizing single_transformer_blocks.20.norm.linear.weight +25-08-31 12:39:17 | D | + quant_dtype: sint4 +25-08-31 12:39:17 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:17 | D | + scale_dtype: (None,) +25-08-31 12:39:18 | D | - Quantizing single_transformer_blocks.20.attn.to_q.weight +25-08-31 12:39:18 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:18 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:18 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:18 | D | - Quantizing single_transformer_blocks.20.attn.to_k.weight +25-08-31 12:39:18 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:18 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:18 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:18 | D | - Quantizing single_transformer_blocks.20.attn.to_v.weight +25-08-31 12:39:18 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:18 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:18 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:18 | D | - Quantizing single_transformer_blocks.20.proj_out.linears.0.weight +25-08-31 12:39:18 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:18 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:18 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:18 | D | - Quantizing single_transformer_blocks.20.proj_mlp.weight +25-08-31 12:39:18 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:18 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:18 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:18 | D | - Quantizing single_transformer_blocks.20.proj_out.linears.1.weight +25-08-31 12:39:18 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:18 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:18 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:19 | D | - Quantizing weights: block single_transformer_blocks.21 +25-08-31 12:39:19 | D | - Quantizing single_transformer_blocks.21.norm.linear.weight +25-08-31 12:39:19 | D | + quant_dtype: sint4 +25-08-31 12:39:19 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:19 | D | + scale_dtype: (None,) +25-08-31 12:39:19 | D | - Quantizing single_transformer_blocks.21.attn.to_q.weight +25-08-31 12:39:19 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:19 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:19 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:19 | D | - Quantizing single_transformer_blocks.21.attn.to_k.weight +25-08-31 12:39:19 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:19 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:19 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:19 | D | - Quantizing single_transformer_blocks.21.attn.to_v.weight +25-08-31 12:39:19 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:19 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:19 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:19 | D | - Quantizing single_transformer_blocks.21.proj_out.linears.0.weight +25-08-31 12:39:19 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:19 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:19 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:20 | D | - Quantizing single_transformer_blocks.21.proj_mlp.weight +25-08-31 12:39:20 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:20 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:20 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:20 | D | - Quantizing single_transformer_blocks.21.proj_out.linears.1.weight +25-08-31 12:39:20 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:20 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:20 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:20 | D | - Quantizing weights: block single_transformer_blocks.22 +25-08-31 12:39:20 | D | - Quantizing single_transformer_blocks.22.norm.linear.weight +25-08-31 12:39:20 | D | + quant_dtype: sint4 +25-08-31 12:39:20 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:20 | D | + scale_dtype: (None,) +25-08-31 12:39:20 | D | - Quantizing single_transformer_blocks.22.attn.to_q.weight +25-08-31 12:39:20 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:20 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:20 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:20 | D | - Quantizing single_transformer_blocks.22.attn.to_k.weight +25-08-31 12:39:20 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:20 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:20 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:21 | D | - Quantizing single_transformer_blocks.22.attn.to_v.weight +25-08-31 12:39:21 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:21 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:21 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:21 | D | - Quantizing single_transformer_blocks.22.proj_out.linears.0.weight +25-08-31 12:39:21 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:21 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:21 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:21 | D | - Quantizing single_transformer_blocks.22.proj_mlp.weight +25-08-31 12:39:21 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:21 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:21 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:21 | D | - Quantizing single_transformer_blocks.22.proj_out.linears.1.weight +25-08-31 12:39:21 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:21 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:21 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:21 | D | - Quantizing weights: block single_transformer_blocks.23 +25-08-31 12:39:21 | D | - Quantizing single_transformer_blocks.23.norm.linear.weight +25-08-31 12:39:21 | D | + quant_dtype: sint4 +25-08-31 12:39:21 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:21 | D | + scale_dtype: (None,) +25-08-31 12:39:22 | D | - Quantizing single_transformer_blocks.23.attn.to_q.weight +25-08-31 12:39:22 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:22 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:22 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:22 | D | - Quantizing single_transformer_blocks.23.attn.to_k.weight +25-08-31 12:39:22 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:22 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:22 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:22 | D | - Quantizing single_transformer_blocks.23.attn.to_v.weight +25-08-31 12:39:22 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:22 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:22 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:22 | D | - Quantizing single_transformer_blocks.23.proj_out.linears.0.weight +25-08-31 12:39:22 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:22 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:22 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:22 | D | - Quantizing single_transformer_blocks.23.proj_mlp.weight +25-08-31 12:39:22 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:22 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:22 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:23 | D | - Quantizing single_transformer_blocks.23.proj_out.linears.1.weight +25-08-31 12:39:23 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:23 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:23 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:23 | D | - Quantizing weights: block single_transformer_blocks.24 +25-08-31 12:39:23 | D | - Quantizing single_transformer_blocks.24.norm.linear.weight +25-08-31 12:39:23 | D | + quant_dtype: sint4 +25-08-31 12:39:23 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:23 | D | + scale_dtype: (None,) +25-08-31 12:39:23 | D | - Quantizing single_transformer_blocks.24.attn.to_q.weight +25-08-31 12:39:23 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:23 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:23 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:23 | D | - Quantizing single_transformer_blocks.24.attn.to_k.weight +25-08-31 12:39:23 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:23 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:23 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:23 | D | - Quantizing single_transformer_blocks.24.attn.to_v.weight +25-08-31 12:39:23 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:23 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:23 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:24 | D | - Quantizing single_transformer_blocks.24.proj_out.linears.0.weight +25-08-31 12:39:24 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:24 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:24 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:24 | D | - Quantizing single_transformer_blocks.24.proj_mlp.weight +25-08-31 12:39:24 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:24 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:24 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:24 | D | - Quantizing single_transformer_blocks.24.proj_out.linears.1.weight +25-08-31 12:39:24 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:24 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:24 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:24 | D | - Quantizing weights: block single_transformer_blocks.25 +25-08-31 12:39:24 | D | - Quantizing single_transformer_blocks.25.norm.linear.weight +25-08-31 12:39:24 | D | + quant_dtype: sint4 +25-08-31 12:39:24 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:24 | D | + scale_dtype: (None,) +25-08-31 12:39:24 | D | - Quantizing single_transformer_blocks.25.attn.to_q.weight +25-08-31 12:39:24 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:24 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:24 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:24 | D | - Quantizing single_transformer_blocks.25.attn.to_k.weight +25-08-31 12:39:24 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:24 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:24 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:25 | D | - Quantizing single_transformer_blocks.25.attn.to_v.weight +25-08-31 12:39:25 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:25 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:25 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:25 | D | - Quantizing single_transformer_blocks.25.proj_out.linears.0.weight +25-08-31 12:39:25 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:25 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:25 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:25 | D | - Quantizing single_transformer_blocks.25.proj_mlp.weight +25-08-31 12:39:25 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:25 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:25 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:25 | D | - Quantizing single_transformer_blocks.25.proj_out.linears.1.weight +25-08-31 12:39:25 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:25 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:25 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:25 | D | - Quantizing weights: block single_transformer_blocks.26 +25-08-31 12:39:25 | D | - Quantizing single_transformer_blocks.26.norm.linear.weight +25-08-31 12:39:25 | D | + quant_dtype: sint4 +25-08-31 12:39:25 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:25 | D | + scale_dtype: (None,) +25-08-31 12:39:26 | D | - Quantizing single_transformer_blocks.26.attn.to_q.weight +25-08-31 12:39:26 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:26 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:26 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:26 | D | - Quantizing single_transformer_blocks.26.attn.to_k.weight +25-08-31 12:39:26 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:26 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:26 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:26 | D | - Quantizing single_transformer_blocks.26.attn.to_v.weight +25-08-31 12:39:26 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:26 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:26 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:26 | D | - Quantizing single_transformer_blocks.26.proj_out.linears.0.weight +25-08-31 12:39:26 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:26 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:26 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:26 | D | - Quantizing single_transformer_blocks.26.proj_mlp.weight +25-08-31 12:39:26 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:26 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:26 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:27 | D | - Quantizing single_transformer_blocks.26.proj_out.linears.1.weight +25-08-31 12:39:27 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:27 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:27 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:27 | D | - Quantizing weights: block single_transformer_blocks.27 +25-08-31 12:39:27 | D | - Quantizing single_transformer_blocks.27.norm.linear.weight +25-08-31 12:39:27 | D | + quant_dtype: sint4 +25-08-31 12:39:27 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:27 | D | + scale_dtype: (None,) +25-08-31 12:39:27 | D | - Quantizing single_transformer_blocks.27.attn.to_q.weight +25-08-31 12:39:27 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:27 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:27 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:27 | D | - Quantizing single_transformer_blocks.27.attn.to_k.weight +25-08-31 12:39:27 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:27 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:27 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:27 | D | - Quantizing single_transformer_blocks.27.attn.to_v.weight +25-08-31 12:39:27 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:27 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:27 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:28 | D | - Quantizing single_transformer_blocks.27.proj_out.linears.0.weight +25-08-31 12:39:28 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:28 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:28 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:28 | D | - Quantizing single_transformer_blocks.27.proj_mlp.weight +25-08-31 12:39:28 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:28 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:28 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:28 | D | - Quantizing single_transformer_blocks.27.proj_out.linears.1.weight +25-08-31 12:39:28 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:28 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:28 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:28 | D | - Quantizing weights: block single_transformer_blocks.28 +25-08-31 12:39:28 | D | - Quantizing single_transformer_blocks.28.norm.linear.weight +25-08-31 12:39:28 | D | + quant_dtype: sint4 +25-08-31 12:39:28 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:28 | D | + scale_dtype: (None,) +25-08-31 12:39:28 | D | - Quantizing single_transformer_blocks.28.attn.to_q.weight +25-08-31 12:39:28 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:28 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:28 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:29 | D | - Quantizing single_transformer_blocks.28.attn.to_k.weight +25-08-31 12:39:29 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:29 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:29 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:29 | D | - Quantizing single_transformer_blocks.28.attn.to_v.weight +25-08-31 12:39:29 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:29 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:29 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:29 | D | - Quantizing single_transformer_blocks.28.proj_out.linears.0.weight +25-08-31 12:39:29 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:29 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:29 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:29 | D | - Quantizing single_transformer_blocks.28.proj_mlp.weight +25-08-31 12:39:29 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:29 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:29 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:29 | D | - Quantizing single_transformer_blocks.28.proj_out.linears.1.weight +25-08-31 12:39:29 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:29 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:29 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:30 | D | - Quantizing weights: block single_transformer_blocks.29 +25-08-31 12:39:30 | D | - Quantizing single_transformer_blocks.29.norm.linear.weight +25-08-31 12:39:30 | D | + quant_dtype: sint4 +25-08-31 12:39:30 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:30 | D | + scale_dtype: (None,) +25-08-31 12:39:30 | D | - Quantizing single_transformer_blocks.29.attn.to_q.weight +25-08-31 12:39:30 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:30 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:30 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:30 | D | - Quantizing single_transformer_blocks.29.attn.to_k.weight +25-08-31 12:39:30 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:30 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:30 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:30 | D | - Quantizing single_transformer_blocks.29.attn.to_v.weight +25-08-31 12:39:30 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:30 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:30 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:30 | D | - Quantizing single_transformer_blocks.29.proj_out.linears.0.weight +25-08-31 12:39:30 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:30 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:30 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:30 | D | - Quantizing single_transformer_blocks.29.proj_mlp.weight +25-08-31 12:39:30 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:30 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:30 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:31 | D | - Quantizing single_transformer_blocks.29.proj_out.linears.1.weight +25-08-31 12:39:31 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:31 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:31 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:31 | D | - Quantizing weights: block single_transformer_blocks.30 +25-08-31 12:39:31 | D | - Quantizing single_transformer_blocks.30.norm.linear.weight +25-08-31 12:39:31 | D | + quant_dtype: sint4 +25-08-31 12:39:31 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:31 | D | + scale_dtype: (None,) +25-08-31 12:39:31 | D | - Quantizing single_transformer_blocks.30.attn.to_q.weight +25-08-31 12:39:31 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:31 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:31 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:31 | D | - Quantizing single_transformer_blocks.30.attn.to_k.weight +25-08-31 12:39:31 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:31 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:31 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:31 | D | - Quantizing single_transformer_blocks.30.attn.to_v.weight +25-08-31 12:39:31 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:31 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:31 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:32 | D | - Quantizing single_transformer_blocks.30.proj_out.linears.0.weight +25-08-31 12:39:32 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:32 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:32 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:32 | D | - Quantizing single_transformer_blocks.30.proj_mlp.weight +25-08-31 12:39:32 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:32 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:32 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:32 | D | - Quantizing single_transformer_blocks.30.proj_out.linears.1.weight +25-08-31 12:39:32 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:32 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:32 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:32 | D | - Quantizing weights: block single_transformer_blocks.31 +25-08-31 12:39:32 | D | - Quantizing single_transformer_blocks.31.norm.linear.weight +25-08-31 12:39:32 | D | + quant_dtype: sint4 +25-08-31 12:39:32 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:32 | D | + scale_dtype: (None,) +25-08-31 12:39:32 | D | - Quantizing single_transformer_blocks.31.attn.to_q.weight +25-08-31 12:39:32 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:32 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:32 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:33 | D | - Quantizing single_transformer_blocks.31.attn.to_k.weight +25-08-31 12:39:33 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:33 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:33 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:33 | D | - Quantizing single_transformer_blocks.31.attn.to_v.weight +25-08-31 12:39:33 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:33 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:33 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:33 | D | - Quantizing single_transformer_blocks.31.proj_out.linears.0.weight +25-08-31 12:39:33 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:33 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:33 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:33 | D | - Quantizing single_transformer_blocks.31.proj_mlp.weight +25-08-31 12:39:33 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:33 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:33 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:33 | D | - Quantizing single_transformer_blocks.31.proj_out.linears.1.weight +25-08-31 12:39:33 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:33 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:33 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:34 | D | - Quantizing weights: block single_transformer_blocks.32 +25-08-31 12:39:34 | D | - Quantizing single_transformer_blocks.32.norm.linear.weight +25-08-31 12:39:34 | D | + quant_dtype: sint4 +25-08-31 12:39:34 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:34 | D | + scale_dtype: (None,) +25-08-31 12:39:34 | D | - Quantizing single_transformer_blocks.32.attn.to_q.weight +25-08-31 12:39:34 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:34 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:34 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:34 | D | - Quantizing single_transformer_blocks.32.attn.to_k.weight +25-08-31 12:39:34 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:34 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:34 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:34 | D | - Quantizing single_transformer_blocks.32.attn.to_v.weight +25-08-31 12:39:34 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:34 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:34 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:34 | D | - Quantizing single_transformer_blocks.32.proj_out.linears.0.weight +25-08-31 12:39:34 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:34 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:34 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:35 | D | - Quantizing single_transformer_blocks.32.proj_mlp.weight +25-08-31 12:39:35 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:35 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:35 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:35 | D | - Quantizing single_transformer_blocks.32.proj_out.linears.1.weight +25-08-31 12:39:35 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:35 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:35 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:35 | D | - Quantizing weights: block single_transformer_blocks.33 +25-08-31 12:39:35 | D | - Quantizing single_transformer_blocks.33.norm.linear.weight +25-08-31 12:39:35 | D | + quant_dtype: sint4 +25-08-31 12:39:35 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:35 | D | + scale_dtype: (None,) +25-08-31 12:39:35 | D | - Quantizing single_transformer_blocks.33.attn.to_q.weight +25-08-31 12:39:35 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:35 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:35 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:35 | D | - Quantizing single_transformer_blocks.33.attn.to_k.weight +25-08-31 12:39:35 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:35 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:35 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:36 | D | - Quantizing single_transformer_blocks.33.attn.to_v.weight +25-08-31 12:39:36 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:36 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:36 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:36 | D | - Quantizing single_transformer_blocks.33.proj_out.linears.0.weight +25-08-31 12:39:36 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:36 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:36 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:36 | D | - Quantizing single_transformer_blocks.33.proj_mlp.weight +25-08-31 12:39:36 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:36 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:36 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:36 | D | - Quantizing single_transformer_blocks.33.proj_out.linears.1.weight +25-08-31 12:39:36 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:36 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:36 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:36 | D | - Quantizing weights: block single_transformer_blocks.34 +25-08-31 12:39:36 | D | - Quantizing single_transformer_blocks.34.norm.linear.weight +25-08-31 12:39:36 | D | + quant_dtype: sint4 +25-08-31 12:39:36 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:36 | D | + scale_dtype: (None,) +25-08-31 12:39:36 | D | - Quantizing single_transformer_blocks.34.attn.to_q.weight +25-08-31 12:39:36 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:36 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:36 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:37 | D | - Quantizing single_transformer_blocks.34.attn.to_k.weight +25-08-31 12:39:37 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:37 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:37 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:37 | D | - Quantizing single_transformer_blocks.34.attn.to_v.weight +25-08-31 12:39:37 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:37 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:37 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:37 | D | - Quantizing single_transformer_blocks.34.proj_out.linears.0.weight +25-08-31 12:39:37 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:37 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:37 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:37 | D | - Quantizing single_transformer_blocks.34.proj_mlp.weight +25-08-31 12:39:37 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:37 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:37 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:37 | D | - Quantizing single_transformer_blocks.34.proj_out.linears.1.weight +25-08-31 12:39:37 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:37 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:37 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:38 | D | - Quantizing weights: block single_transformer_blocks.35 +25-08-31 12:39:38 | D | - Quantizing single_transformer_blocks.35.norm.linear.weight +25-08-31 12:39:38 | D | + quant_dtype: sint4 +25-08-31 12:39:38 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:38 | D | + scale_dtype: (None,) +25-08-31 12:39:38 | D | - Quantizing single_transformer_blocks.35.attn.to_q.weight +25-08-31 12:39:38 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:38 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:38 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:38 | D | - Quantizing single_transformer_blocks.35.attn.to_k.weight +25-08-31 12:39:38 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:38 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:38 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:38 | D | - Quantizing single_transformer_blocks.35.attn.to_v.weight +25-08-31 12:39:38 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:38 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:38 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:38 | D | - Quantizing single_transformer_blocks.35.proj_out.linears.0.weight +25-08-31 12:39:38 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:38 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:38 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:39 | D | - Quantizing single_transformer_blocks.35.proj_mlp.weight +25-08-31 12:39:39 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:39 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:39 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:39 | D | - Quantizing single_transformer_blocks.35.proj_out.linears.1.weight +25-08-31 12:39:39 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:39 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:39 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:39 | D | - Quantizing weights: block single_transformer_blocks.36 +25-08-31 12:39:39 | D | - Quantizing single_transformer_blocks.36.norm.linear.weight +25-08-31 12:39:39 | D | + quant_dtype: sint4 +25-08-31 12:39:39 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:39 | D | + scale_dtype: (None,) +25-08-31 12:39:39 | D | - Quantizing single_transformer_blocks.36.attn.to_q.weight +25-08-31 12:39:39 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:39 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:39 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:39 | D | - Quantizing single_transformer_blocks.36.attn.to_k.weight +25-08-31 12:39:39 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:39 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:39 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:40 | D | - Quantizing single_transformer_blocks.36.attn.to_v.weight +25-08-31 12:39:40 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:40 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:40 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:40 | D | - Quantizing single_transformer_blocks.36.proj_out.linears.0.weight +25-08-31 12:39:40 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:40 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:40 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:40 | D | - Quantizing single_transformer_blocks.36.proj_mlp.weight +25-08-31 12:39:40 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:40 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:40 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:40 | D | - Quantizing single_transformer_blocks.36.proj_out.linears.1.weight +25-08-31 12:39:40 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:40 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:40 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:40 | D | - Quantizing weights: block single_transformer_blocks.37 +25-08-31 12:39:40 | D | - Quantizing single_transformer_blocks.37.norm.linear.weight +25-08-31 12:39:40 | D | + quant_dtype: sint4 +25-08-31 12:39:40 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-31 12:39:40 | D | + scale_dtype: (None,) +25-08-31 12:39:40 | D | - Quantizing single_transformer_blocks.37.attn.to_q.weight +25-08-31 12:39:40 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:40 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:40 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:41 | D | - Quantizing single_transformer_blocks.37.attn.to_k.weight +25-08-31 12:39:41 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:41 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:41 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:41 | D | - Quantizing single_transformer_blocks.37.attn.to_v.weight +25-08-31 12:39:41 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:41 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:41 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:41 | D | - Quantizing single_transformer_blocks.37.proj_out.linears.0.weight +25-08-31 12:39:41 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:41 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:41 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:41 | D | - Quantizing single_transformer_blocks.37.proj_mlp.weight +25-08-31 12:39:41 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:41 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:41 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:41 | D | - Quantizing single_transformer_blocks.37.proj_out.linears.1.weight +25-08-31 12:39:41 | D | + quant_dtype: sfp4_e2m1_all +25-08-31 12:39:41 | D | + group_shape: ((-1, -1, -1), (1, 16, 1, 1, 1)) +25-08-31 12:39:41 | D | + scale_dtype: (None, sfp8_e4m3_nan) +25-08-31 12:39:42 | I | - Saving branch settings to runs/diffusion/cache/quant/qdiff.128/branch/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]-w.include.[tan+tn]/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]/lowrank.OutputsError.Manual.Layer.d2.en1.sn1/lowrank.i100.r32.earlystop/lowrank.skip.[rc+tan+tn]/flux.1-dev-ghibli.pt +25-08-31 12:39:42 | I | - Saving weight settings to runs/diffusion/flux.1/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000.RUNNING/run-250831.030550.RUNNING/model/wgts.pt +25-08-31 12:39:42 | I | - Linking branch settings to runs/diffusion/flux.1/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000.RUNNING/run-250831.030550.RUNNING/model/branch.pt +25-08-31 12:39:42 | I | - Saving model to runs/diffusion/flux.1/flux.1-dev-ghibli/w.4-x.4-y.16-w.4/w.sfp4_e2m1_all-x.sfp4_e2m1_all-y.bf16-w.sint4/w.v16.sfp8_e4m3_nan.tsnr.bf16-x.v16.sfp8_e4m3_nan-y.tnsr.bf16-w.v64.bf16/smooth.proj-w.static.lowrank/skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-extra.[tan+tn]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000.RUNNING/run-250831.030550.RUNNING/model +25-08-31 12:40:11 | I | * Quantizing activations +25-08-31 12:40:11 | I | - No need to generate/load activation quantizer settings +25-08-31 12:40:11 | D | - Quantizing layer transformer_blocks.0 +25-08-31 12:40:11 | D | - Calibrating transformer_blocks.0.attn.to_q.input, transformer_blocks.0.attn.to_k.input, transformer_blocks.0.attn.to_v.input +25-08-31 12:40:11 | D | - Calibrating transformer_blocks.0.attn.add_q_proj.input, transformer_blocks.0.attn.add_k_proj.input, transformer_blocks.0.attn.add_v_proj.input +25-08-31 12:40:11 | D | - Calibrating transformer_blocks.0.attn.to_out.0.input +25-08-31 12:40:11 | D | - Calibrating transformer_blocks.0.attn.to_add_out.input +25-08-31 12:40:12 | D | - Calibrating transformer_blocks.0.ff.net.0.proj.input +25-08-31 12:40:12 | D | - Calibrating transformer_blocks.0.ff.net.2.input +25-08-31 12:40:12 | D | - Calibrating transformer_blocks.0.ff_context.net.0.proj.input +25-08-31 12:40:12 | D | - Calibrating transformer_blocks.0.ff_context.net.2.input +25-08-31 12:40:12 | D | - Quantizing layer transformer_blocks.1 +25-08-31 12:40:12 | D | - Calibrating transformer_blocks.1.attn.to_q.input, transformer_blocks.1.attn.to_k.input, transformer_blocks.1.attn.to_v.input +25-08-31 12:40:12 | D | - Calibrating transformer_blocks.1.attn.add_q_proj.input, transformer_blocks.1.attn.add_k_proj.input, transformer_blocks.1.attn.add_v_proj.input +25-08-31 12:40:13 | D | - Calibrating transformer_blocks.1.attn.to_out.0.input +25-08-31 12:40:13 | D | - Calibrating transformer_blocks.1.attn.to_add_out.input +25-08-31 12:40:13 | D | - Calibrating transformer_blocks.1.ff.net.0.proj.input +25-08-31 12:40:13 | D | - Calibrating transformer_blocks.1.ff.net.2.input +25-08-31 12:40:13 | D | - Calibrating transformer_blocks.1.ff_context.net.0.proj.input +25-08-31 12:40:14 | D | - Calibrating transformer_blocks.1.ff_context.net.2.input +25-08-31 12:40:14 | D | - Quantizing layer transformer_blocks.2 +25-08-31 12:40:14 | D | - Calibrating transformer_blocks.2.attn.to_q.input, transformer_blocks.2.attn.to_k.input, transformer_blocks.2.attn.to_v.input +25-08-31 12:40:14 | D | - Calibrating transformer_blocks.2.attn.add_q_proj.input, transformer_blocks.2.attn.add_k_proj.input, transformer_blocks.2.attn.add_v_proj.input +25-08-31 12:40:14 | D | - Calibrating transformer_blocks.2.attn.to_out.0.input +25-08-31 12:40:14 | D | - Calibrating transformer_blocks.2.attn.to_add_out.input +25-08-31 12:40:15 | D | - Calibrating transformer_blocks.2.ff.net.0.proj.input +25-08-31 12:40:15 | D | - Calibrating transformer_blocks.2.ff.net.2.input +25-08-31 12:40:15 | D | - Calibrating transformer_blocks.2.ff_context.net.0.proj.input +25-08-31 12:40:15 | D | - Calibrating transformer_blocks.2.ff_context.net.2.input +25-08-31 12:40:15 | D | - Quantizing layer transformer_blocks.3 +25-08-31 12:40:15 | D | - Calibrating transformer_blocks.3.attn.to_q.input, transformer_blocks.3.attn.to_k.input, transformer_blocks.3.attn.to_v.input +25-08-31 12:40:15 | D | - Calibrating transformer_blocks.3.attn.add_q_proj.input, transformer_blocks.3.attn.add_k_proj.input, transformer_blocks.3.attn.add_v_proj.input +25-08-31 12:40:16 | D | - Calibrating transformer_blocks.3.attn.to_out.0.input +25-08-31 12:40:16 | D | - Calibrating transformer_blocks.3.attn.to_add_out.input +25-08-31 12:40:16 | D | - Calibrating transformer_blocks.3.ff.net.0.proj.input +25-08-31 12:40:16 | D | - Calibrating transformer_blocks.3.ff.net.2.input +25-08-31 12:40:16 | D | - Calibrating transformer_blocks.3.ff_context.net.0.proj.input +25-08-31 12:40:17 | D | - Calibrating transformer_blocks.3.ff_context.net.2.input +25-08-31 12:40:17 | D | - Quantizing layer transformer_blocks.4 +25-08-31 12:40:17 | D | - Calibrating transformer_blocks.4.attn.to_q.input, transformer_blocks.4.attn.to_k.input, transformer_blocks.4.attn.to_v.input +25-08-31 12:40:17 | D | - Calibrating transformer_blocks.4.attn.add_q_proj.input, transformer_blocks.4.attn.add_k_proj.input, transformer_blocks.4.attn.add_v_proj.input +25-08-31 12:40:17 | D | - Calibrating transformer_blocks.4.attn.to_out.0.input +25-08-31 12:40:17 | D | - Calibrating transformer_blocks.4.attn.to_add_out.input +25-08-31 12:40:17 | D | - Calibrating transformer_blocks.4.ff.net.0.proj.input +25-08-31 12:40:18 | D | - Calibrating transformer_blocks.4.ff.net.2.input +25-08-31 12:40:18 | D | - Calibrating transformer_blocks.4.ff_context.net.0.proj.input +25-08-31 12:40:18 | D | - Calibrating transformer_blocks.4.ff_context.net.2.input +25-08-31 12:40:18 | D | - Quantizing layer transformer_blocks.5 +25-08-31 12:40:18 | D | - Calibrating transformer_blocks.5.attn.to_q.input, transformer_blocks.5.attn.to_k.input, transformer_blocks.5.attn.to_v.input +25-08-31 12:40:18 | D | - Calibrating transformer_blocks.5.attn.add_q_proj.input, transformer_blocks.5.attn.add_k_proj.input, transformer_blocks.5.attn.add_v_proj.input +25-08-31 12:40:19 | D | - Calibrating transformer_blocks.5.attn.to_out.0.input +25-08-31 12:40:19 | D | - Calibrating transformer_blocks.5.attn.to_add_out.input +25-08-31 12:40:19 | D | - Calibrating transformer_blocks.5.ff.net.0.proj.input +25-08-31 12:40:19 | D | - Calibrating transformer_blocks.5.ff.net.2.input +25-08-31 12:40:19 | D | - Calibrating transformer_blocks.5.ff_context.net.0.proj.input +25-08-31 12:40:20 | D | - Calibrating transformer_blocks.5.ff_context.net.2.input +25-08-31 12:40:20 | D | - Quantizing layer transformer_blocks.6 +25-08-31 12:40:20 | D | - Calibrating transformer_blocks.6.attn.to_q.input, transformer_blocks.6.attn.to_k.input, transformer_blocks.6.attn.to_v.input +25-08-31 12:40:20 | D | - Calibrating transformer_blocks.6.attn.add_q_proj.input, transformer_blocks.6.attn.add_k_proj.input, transformer_blocks.6.attn.add_v_proj.input +25-08-31 12:40:20 | D | - Calibrating transformer_blocks.6.attn.to_out.0.input +25-08-31 12:40:20 | D | - Calibrating transformer_blocks.6.attn.to_add_out.input +25-08-31 12:40:20 | D | - Calibrating transformer_blocks.6.ff.net.0.proj.input +25-08-31 12:40:21 | D | - Calibrating transformer_blocks.6.ff.net.2.input +25-08-31 12:40:21 | D | - Calibrating transformer_blocks.6.ff_context.net.0.proj.input +25-08-31 12:40:21 | D | - Calibrating transformer_blocks.6.ff_context.net.2.input +25-08-31 12:40:21 | D | - Quantizing layer transformer_blocks.7 +25-08-31 12:40:21 | D | - Calibrating transformer_blocks.7.attn.to_q.input, transformer_blocks.7.attn.to_k.input, transformer_blocks.7.attn.to_v.input +25-08-31 12:40:21 | D | - Calibrating transformer_blocks.7.attn.add_q_proj.input, transformer_blocks.7.attn.add_k_proj.input, transformer_blocks.7.attn.add_v_proj.input +25-08-31 12:40:22 | D | - Calibrating transformer_blocks.7.attn.to_out.0.input +25-08-31 12:40:22 | D | - Calibrating transformer_blocks.7.attn.to_add_out.input +25-08-31 12:40:22 | D | - Calibrating transformer_blocks.7.ff.net.0.proj.input +25-08-31 12:40:22 | D | - Calibrating transformer_blocks.7.ff.net.2.input +25-08-31 12:40:22 | D | - Calibrating transformer_blocks.7.ff_context.net.0.proj.input +25-08-31 12:40:22 | D | - Calibrating transformer_blocks.7.ff_context.net.2.input +25-08-31 12:40:23 | D | - Quantizing layer transformer_blocks.8 +25-08-31 12:40:23 | D | - Calibrating transformer_blocks.8.attn.to_q.input, transformer_blocks.8.attn.to_k.input, transformer_blocks.8.attn.to_v.input +25-08-31 12:40:23 | D | - Calibrating transformer_blocks.8.attn.add_q_proj.input, transformer_blocks.8.attn.add_k_proj.input, transformer_blocks.8.attn.add_v_proj.input +25-08-31 12:40:23 | D | - Calibrating transformer_blocks.8.attn.to_out.0.input +25-08-31 12:40:23 | D | - Calibrating transformer_blocks.8.attn.to_add_out.input +25-08-31 12:40:23 | D | - Calibrating transformer_blocks.8.ff.net.0.proj.input +25-08-31 12:40:24 | D | - Calibrating transformer_blocks.8.ff.net.2.input +25-08-31 12:40:24 | D | - Calibrating transformer_blocks.8.ff_context.net.0.proj.input +25-08-31 12:40:24 | D | - Calibrating transformer_blocks.8.ff_context.net.2.input +25-08-31 12:40:24 | D | - Quantizing layer transformer_blocks.9 +25-08-31 12:40:24 | D | - Calibrating transformer_blocks.9.attn.to_q.input, transformer_blocks.9.attn.to_k.input, transformer_blocks.9.attn.to_v.input +25-08-31 12:40:24 | D | - Calibrating transformer_blocks.9.attn.add_q_proj.input, transformer_blocks.9.attn.add_k_proj.input, transformer_blocks.9.attn.add_v_proj.input +25-08-31 12:40:25 | D | - Calibrating transformer_blocks.9.attn.to_out.0.input +25-08-31 12:40:25 | D | - Calibrating transformer_blocks.9.attn.to_add_out.input +25-08-31 12:40:25 | D | - Calibrating transformer_blocks.9.ff.net.0.proj.input +25-08-31 12:40:25 | D | - Calibrating transformer_blocks.9.ff.net.2.input +25-08-31 12:40:25 | D | - Calibrating transformer_blocks.9.ff_context.net.0.proj.input +25-08-31 12:40:25 | D | - Calibrating transformer_blocks.9.ff_context.net.2.input +25-08-31 12:40:26 | D | - Quantizing layer transformer_blocks.10 +25-08-31 12:40:26 | D | - Calibrating transformer_blocks.10.attn.to_q.input, transformer_blocks.10.attn.to_k.input, transformer_blocks.10.attn.to_v.input +25-08-31 12:40:26 | D | - Calibrating transformer_blocks.10.attn.add_q_proj.input, transformer_blocks.10.attn.add_k_proj.input, transformer_blocks.10.attn.add_v_proj.input +25-08-31 12:40:26 | D | - Calibrating transformer_blocks.10.attn.to_out.0.input +25-08-31 12:40:26 | D | - Calibrating transformer_blocks.10.attn.to_add_out.input +25-08-31 12:40:26 | D | - Calibrating transformer_blocks.10.ff.net.0.proj.input +25-08-31 12:40:27 | D | - Calibrating transformer_blocks.10.ff.net.2.input +25-08-31 12:40:27 | D | - Calibrating transformer_blocks.10.ff_context.net.0.proj.input +25-08-31 12:40:27 | D | - Calibrating transformer_blocks.10.ff_context.net.2.input +25-08-31 12:40:27 | D | - Quantizing layer transformer_blocks.11 +25-08-31 12:40:27 | D | - Calibrating transformer_blocks.11.attn.to_q.input, transformer_blocks.11.attn.to_k.input, transformer_blocks.11.attn.to_v.input +25-08-31 12:40:27 | D | - Calibrating transformer_blocks.11.attn.add_q_proj.input, transformer_blocks.11.attn.add_k_proj.input, transformer_blocks.11.attn.add_v_proj.input +25-08-31 12:40:28 | D | - Calibrating transformer_blocks.11.attn.to_out.0.input +25-08-31 12:40:28 | D | - Calibrating transformer_blocks.11.attn.to_add_out.input +25-08-31 12:40:28 | D | - Calibrating transformer_blocks.11.ff.net.0.proj.input +25-08-31 12:40:28 | D | - Calibrating transformer_blocks.11.ff.net.2.input +25-08-31 12:40:28 | D | - Calibrating transformer_blocks.11.ff_context.net.0.proj.input +25-08-31 12:40:28 | D | - Calibrating transformer_blocks.11.ff_context.net.2.input +25-08-31 12:40:29 | D | - Quantizing layer transformer_blocks.12 +25-08-31 12:40:29 | D | - Calibrating transformer_blocks.12.attn.to_q.input, transformer_blocks.12.attn.to_k.input, transformer_blocks.12.attn.to_v.input +25-08-31 12:40:29 | D | - Calibrating transformer_blocks.12.attn.add_q_proj.input, transformer_blocks.12.attn.add_k_proj.input, transformer_blocks.12.attn.add_v_proj.input +25-08-31 12:40:29 | D | - Calibrating transformer_blocks.12.attn.to_out.0.input +25-08-31 12:40:29 | D | - Calibrating transformer_blocks.12.attn.to_add_out.input +25-08-31 12:40:29 | D | - Calibrating transformer_blocks.12.ff.net.0.proj.input +25-08-31 12:40:30 | D | - Calibrating transformer_blocks.12.ff.net.2.input +25-08-31 12:40:30 | D | - Calibrating transformer_blocks.12.ff_context.net.0.proj.input +25-08-31 12:40:30 | D | - Calibrating transformer_blocks.12.ff_context.net.2.input +25-08-31 12:40:30 | D | - Quantizing layer transformer_blocks.13 +25-08-31 12:40:30 | D | - Calibrating transformer_blocks.13.attn.to_q.input, transformer_blocks.13.attn.to_k.input, transformer_blocks.13.attn.to_v.input +25-08-31 12:40:30 | D | - Calibrating transformer_blocks.13.attn.add_q_proj.input, transformer_blocks.13.attn.add_k_proj.input, transformer_blocks.13.attn.add_v_proj.input +25-08-31 12:40:30 | D | - Calibrating transformer_blocks.13.attn.to_out.0.input +25-08-31 12:40:31 | D | - Calibrating transformer_blocks.13.attn.to_add_out.input +25-08-31 12:40:31 | D | - Calibrating transformer_blocks.13.ff.net.0.proj.input +25-08-31 12:40:31 | D | - Calibrating transformer_blocks.13.ff.net.2.input +25-08-31 12:40:31 | D | - Calibrating transformer_blocks.13.ff_context.net.0.proj.input +25-08-31 12:40:31 | D | - Calibrating transformer_blocks.13.ff_context.net.2.input +25-08-31 12:40:32 | D | - Quantizing layer transformer_blocks.14 +25-08-31 12:40:32 | D | - Calibrating transformer_blocks.14.attn.to_q.input, transformer_blocks.14.attn.to_k.input, transformer_blocks.14.attn.to_v.input +25-08-31 12:40:32 | D | - Calibrating transformer_blocks.14.attn.add_q_proj.input, transformer_blocks.14.attn.add_k_proj.input, transformer_blocks.14.attn.add_v_proj.input +25-08-31 12:40:32 | D | - Calibrating transformer_blocks.14.attn.to_out.0.input +25-08-31 12:40:32 | D | - Calibrating transformer_blocks.14.attn.to_add_out.input +25-08-31 12:40:32 | D | - Calibrating transformer_blocks.14.ff.net.0.proj.input +25-08-31 12:40:33 | D | - Calibrating transformer_blocks.14.ff.net.2.input +25-08-31 12:40:33 | D | - Calibrating transformer_blocks.14.ff_context.net.0.proj.input +25-08-31 12:40:33 | D | - Calibrating transformer_blocks.14.ff_context.net.2.input +25-08-31 12:40:33 | D | - Quantizing layer transformer_blocks.15 +25-08-31 12:40:33 | D | - Calibrating transformer_blocks.15.attn.to_q.input, transformer_blocks.15.attn.to_k.input, transformer_blocks.15.attn.to_v.input +25-08-31 12:40:33 | D | - Calibrating transformer_blocks.15.attn.add_q_proj.input, transformer_blocks.15.attn.add_k_proj.input, transformer_blocks.15.attn.add_v_proj.input +25-08-31 12:40:33 | D | - Calibrating transformer_blocks.15.attn.to_out.0.input +25-08-31 12:40:34 | D | - Calibrating transformer_blocks.15.attn.to_add_out.input +25-08-31 12:40:34 | D | - Calibrating transformer_blocks.15.ff.net.0.proj.input +25-08-31 12:40:34 | D | - Calibrating transformer_blocks.15.ff.net.2.input +25-08-31 12:40:34 | D | - Calibrating transformer_blocks.15.ff_context.net.0.proj.input +25-08-31 12:40:34 | D | - Calibrating transformer_blocks.15.ff_context.net.2.input +25-08-31 12:40:35 | D | - Quantizing layer transformer_blocks.16 +25-08-31 12:40:35 | D | - Calibrating transformer_blocks.16.attn.to_q.input, transformer_blocks.16.attn.to_k.input, transformer_blocks.16.attn.to_v.input +25-08-31 12:40:35 | D | - Calibrating transformer_blocks.16.attn.add_q_proj.input, transformer_blocks.16.attn.add_k_proj.input, transformer_blocks.16.attn.add_v_proj.input +25-08-31 12:40:35 | D | - Calibrating transformer_blocks.16.attn.to_out.0.input +25-08-31 12:40:35 | D | - Calibrating transformer_blocks.16.attn.to_add_out.input +25-08-31 12:40:35 | D | - Calibrating transformer_blocks.16.ff.net.0.proj.input +25-08-31 12:40:36 | D | - Calibrating transformer_blocks.16.ff.net.2.input +25-08-31 12:40:36 | D | - Calibrating transformer_blocks.16.ff_context.net.0.proj.input +25-08-31 12:40:36 | D | - Calibrating transformer_blocks.16.ff_context.net.2.input +25-08-31 12:40:36 | D | - Quantizing layer transformer_blocks.17 +25-08-31 12:40:36 | D | - Calibrating transformer_blocks.17.attn.to_q.input, transformer_blocks.17.attn.to_k.input, transformer_blocks.17.attn.to_v.input +25-08-31 12:40:36 | D | - Calibrating transformer_blocks.17.attn.add_q_proj.input, transformer_blocks.17.attn.add_k_proj.input, transformer_blocks.17.attn.add_v_proj.input +25-08-31 12:40:36 | D | - Calibrating transformer_blocks.17.attn.to_out.0.input +25-08-31 12:40:37 | D | - Calibrating transformer_blocks.17.attn.to_add_out.input +25-08-31 12:40:37 | D | - Calibrating transformer_blocks.17.ff.net.0.proj.input +25-08-31 12:40:37 | D | - Calibrating transformer_blocks.17.ff.net.2.input +25-08-31 12:40:37 | D | - Calibrating transformer_blocks.17.ff_context.net.0.proj.input +25-08-31 12:40:37 | D | - Calibrating transformer_blocks.17.ff_context.net.2.input +25-08-31 12:40:38 | D | - Quantizing layer transformer_blocks.18 +25-08-31 12:40:38 | D | - Calibrating transformer_blocks.18.attn.to_q.input, transformer_blocks.18.attn.to_k.input, transformer_blocks.18.attn.to_v.input +25-08-31 12:40:38 | D | - Calibrating transformer_blocks.18.attn.add_q_proj.input, transformer_blocks.18.attn.add_k_proj.input, transformer_blocks.18.attn.add_v_proj.input +25-08-31 12:40:38 | D | - Calibrating transformer_blocks.18.attn.to_out.0.input +25-08-31 12:40:38 | D | - Calibrating transformer_blocks.18.attn.to_add_out.input +25-08-31 12:40:38 | D | - Calibrating transformer_blocks.18.ff.net.0.proj.input +25-08-31 12:40:38 | D | - Calibrating transformer_blocks.18.ff.net.2.input +25-08-31 12:40:39 | D | - Calibrating transformer_blocks.18.ff_context.net.0.proj.input +25-08-31 12:40:39 | D | - Calibrating transformer_blocks.18.ff_context.net.2.input +25-08-31 12:40:39 | D | - Quantizing layer single_transformer_blocks.0 +25-08-31 12:40:39 | D | - Calibrating single_transformer_blocks.0.attn.to_q.input, single_transformer_blocks.0.attn.to_k.input, single_transformer_blocks.0.attn.to_v.input, single_transformer_blocks.0.proj_mlp.input +25-08-31 12:40:39 | D | - Calibrating single_transformer_blocks.0.proj_out.linears.0.input +25-08-31 12:40:39 | D | - Calibrating single_transformer_blocks.0.proj_out.linears.1.input +25-08-31 12:40:40 | D | - Quantizing layer single_transformer_blocks.1 +25-08-31 12:40:40 | D | - Calibrating single_transformer_blocks.1.attn.to_q.input, single_transformer_blocks.1.attn.to_k.input, single_transformer_blocks.1.attn.to_v.input, single_transformer_blocks.1.proj_mlp.input +25-08-31 12:40:40 | D | - Calibrating single_transformer_blocks.1.proj_out.linears.0.input +25-08-31 12:40:40 | D | - Calibrating single_transformer_blocks.1.proj_out.linears.1.input +25-08-31 12:40:40 | D | - Quantizing layer single_transformer_blocks.2 +25-08-31 12:40:40 | D | - Calibrating single_transformer_blocks.2.attn.to_q.input, single_transformer_blocks.2.attn.to_k.input, single_transformer_blocks.2.attn.to_v.input, single_transformer_blocks.2.proj_mlp.input +25-08-31 12:40:40 | D | - Calibrating single_transformer_blocks.2.proj_out.linears.0.input +25-08-31 12:40:41 | D | - Calibrating single_transformer_blocks.2.proj_out.linears.1.input +25-08-31 12:40:41 | D | - Quantizing layer single_transformer_blocks.3 +25-08-31 12:40:41 | D | - Calibrating single_transformer_blocks.3.attn.to_q.input, single_transformer_blocks.3.attn.to_k.input, single_transformer_blocks.3.attn.to_v.input, single_transformer_blocks.3.proj_mlp.input +25-08-31 12:40:41 | D | - Calibrating single_transformer_blocks.3.proj_out.linears.0.input +25-08-31 12:40:41 | D | - Calibrating single_transformer_blocks.3.proj_out.linears.1.input +25-08-31 12:40:41 | D | - Quantizing layer single_transformer_blocks.4 +25-08-31 12:40:41 | D | - Calibrating single_transformer_blocks.4.attn.to_q.input, single_transformer_blocks.4.attn.to_k.input, single_transformer_blocks.4.attn.to_v.input, single_transformer_blocks.4.proj_mlp.input +25-08-31 12:40:41 | D | - Calibrating single_transformer_blocks.4.proj_out.linears.0.input +25-08-31 12:40:42 | D | - Calibrating single_transformer_blocks.4.proj_out.linears.1.input +25-08-31 12:40:42 | D | - Quantizing layer single_transformer_blocks.5 +25-08-31 12:40:42 | D | - Calibrating single_transformer_blocks.5.attn.to_q.input, single_transformer_blocks.5.attn.to_k.input, single_transformer_blocks.5.attn.to_v.input, single_transformer_blocks.5.proj_mlp.input +25-08-31 12:40:42 | D | - Calibrating single_transformer_blocks.5.proj_out.linears.0.input +25-08-31 12:40:42 | D | - Calibrating single_transformer_blocks.5.proj_out.linears.1.input +25-08-31 12:40:42 | D | - Quantizing layer single_transformer_blocks.6 +25-08-31 12:40:42 | D | - Calibrating single_transformer_blocks.6.attn.to_q.input, single_transformer_blocks.6.attn.to_k.input, single_transformer_blocks.6.attn.to_v.input, single_transformer_blocks.6.proj_mlp.input +25-08-31 12:40:43 | D | - Calibrating single_transformer_blocks.6.proj_out.linears.0.input +25-08-31 12:40:43 | D | - Calibrating single_transformer_blocks.6.proj_out.linears.1.input +25-08-31 12:40:43 | D | - Quantizing layer single_transformer_blocks.7 +25-08-31 12:40:43 | D | - Calibrating single_transformer_blocks.7.attn.to_q.input, single_transformer_blocks.7.attn.to_k.input, single_transformer_blocks.7.attn.to_v.input, single_transformer_blocks.7.proj_mlp.input +25-08-31 12:40:43 | D | - Calibrating single_transformer_blocks.7.proj_out.linears.0.input +25-08-31 12:40:43 | D | - Calibrating single_transformer_blocks.7.proj_out.linears.1.input +25-08-31 12:40:44 | D | - Quantizing layer single_transformer_blocks.8 +25-08-31 12:40:44 | D | - Calibrating single_transformer_blocks.8.attn.to_q.input, single_transformer_blocks.8.attn.to_k.input, single_transformer_blocks.8.attn.to_v.input, single_transformer_blocks.8.proj_mlp.input +25-08-31 12:40:44 | D | - Calibrating single_transformer_blocks.8.proj_out.linears.0.input +25-08-31 12:40:44 | D | - Calibrating single_transformer_blocks.8.proj_out.linears.1.input +25-08-31 12:40:44 | D | - Quantizing layer single_transformer_blocks.9 +25-08-31 12:40:44 | D | - Calibrating single_transformer_blocks.9.attn.to_q.input, single_transformer_blocks.9.attn.to_k.input, single_transformer_blocks.9.attn.to_v.input, single_transformer_blocks.9.proj_mlp.input +25-08-31 12:40:44 | D | - Calibrating single_transformer_blocks.9.proj_out.linears.0.input +25-08-31 12:40:44 | D | - Calibrating single_transformer_blocks.9.proj_out.linears.1.input +25-08-31 12:40:45 | D | - Quantizing layer single_transformer_blocks.10 +25-08-31 12:40:45 | D | - Calibrating single_transformer_blocks.10.attn.to_q.input, single_transformer_blocks.10.attn.to_k.input, single_transformer_blocks.10.attn.to_v.input, single_transformer_blocks.10.proj_mlp.input +25-08-31 12:40:45 | D | - Calibrating single_transformer_blocks.10.proj_out.linears.0.input +25-08-31 12:40:45 | D | - Calibrating single_transformer_blocks.10.proj_out.linears.1.input +25-08-31 12:40:45 | D | - Quantizing layer single_transformer_blocks.11 +25-08-31 12:40:45 | D | - Calibrating single_transformer_blocks.11.attn.to_q.input, single_transformer_blocks.11.attn.to_k.input, single_transformer_blocks.11.attn.to_v.input, single_transformer_blocks.11.proj_mlp.input +25-08-31 12:40:45 | D | - Calibrating single_transformer_blocks.11.proj_out.linears.0.input +25-08-31 12:40:46 | D | - Calibrating single_transformer_blocks.11.proj_out.linears.1.input +25-08-31 12:40:46 | D | - Quantizing layer single_transformer_blocks.12 +25-08-31 12:40:46 | D | - Calibrating single_transformer_blocks.12.attn.to_q.input, single_transformer_blocks.12.attn.to_k.input, single_transformer_blocks.12.attn.to_v.input, single_transformer_blocks.12.proj_mlp.input +25-08-31 12:40:46 | D | - Calibrating single_transformer_blocks.12.proj_out.linears.0.input +25-08-31 12:40:46 | D | - Calibrating single_transformer_blocks.12.proj_out.linears.1.input +25-08-31 12:40:46 | D | - Quantizing layer single_transformer_blocks.13 +25-08-31 12:40:46 | D | - Calibrating single_transformer_blocks.13.attn.to_q.input, single_transformer_blocks.13.attn.to_k.input, single_transformer_blocks.13.attn.to_v.input, single_transformer_blocks.13.proj_mlp.input +25-08-31 12:40:46 | D | - Calibrating single_transformer_blocks.13.proj_out.linears.0.input +25-08-31 12:40:47 | D | - Calibrating single_transformer_blocks.13.proj_out.linears.1.input +25-08-31 12:40:47 | D | - Quantizing layer single_transformer_blocks.14 +25-08-31 12:40:47 | D | - Calibrating single_transformer_blocks.14.attn.to_q.input, single_transformer_blocks.14.attn.to_k.input, single_transformer_blocks.14.attn.to_v.input, single_transformer_blocks.14.proj_mlp.input +25-08-31 12:40:47 | D | - Calibrating single_transformer_blocks.14.proj_out.linears.0.input +25-08-31 12:40:47 | D | - Calibrating single_transformer_blocks.14.proj_out.linears.1.input +25-08-31 12:40:47 | D | - Quantizing layer single_transformer_blocks.15 +25-08-31 12:40:47 | D | - Calibrating single_transformer_blocks.15.attn.to_q.input, single_transformer_blocks.15.attn.to_k.input, single_transformer_blocks.15.attn.to_v.input, single_transformer_blocks.15.proj_mlp.input +25-08-31 12:40:48 | D | - Calibrating single_transformer_blocks.15.proj_out.linears.0.input +25-08-31 12:40:48 | D | - Calibrating single_transformer_blocks.15.proj_out.linears.1.input +25-08-31 12:40:48 | D | - Quantizing layer single_transformer_blocks.16 +25-08-31 12:40:48 | D | - Calibrating single_transformer_blocks.16.attn.to_q.input, single_transformer_blocks.16.attn.to_k.input, single_transformer_blocks.16.attn.to_v.input, single_transformer_blocks.16.proj_mlp.input +25-08-31 12:40:48 | D | - Calibrating single_transformer_blocks.16.proj_out.linears.0.input +25-08-31 12:40:48 | D | - Calibrating single_transformer_blocks.16.proj_out.linears.1.input +25-08-31 12:40:49 | D | - Quantizing layer single_transformer_blocks.17 +25-08-31 12:40:49 | D | - Calibrating single_transformer_blocks.17.attn.to_q.input, single_transformer_blocks.17.attn.to_k.input, single_transformer_blocks.17.attn.to_v.input, single_transformer_blocks.17.proj_mlp.input +25-08-31 12:40:49 | D | - Calibrating single_transformer_blocks.17.proj_out.linears.0.input +25-08-31 12:40:49 | D | - Calibrating single_transformer_blocks.17.proj_out.linears.1.input +25-08-31 12:40:49 | D | - Quantizing layer single_transformer_blocks.18 +25-08-31 12:40:49 | D | - Calibrating single_transformer_blocks.18.attn.to_q.input, single_transformer_blocks.18.attn.to_k.input, single_transformer_blocks.18.attn.to_v.input, single_transformer_blocks.18.proj_mlp.input +25-08-31 12:40:49 | D | - Calibrating single_transformer_blocks.18.proj_out.linears.0.input +25-08-31 12:40:49 | D | - Calibrating single_transformer_blocks.18.proj_out.linears.1.input +25-08-31 12:40:50 | D | - Quantizing layer single_transformer_blocks.19 +25-08-31 12:40:50 | D | - Calibrating single_transformer_blocks.19.attn.to_q.input, single_transformer_blocks.19.attn.to_k.input, single_transformer_blocks.19.attn.to_v.input, single_transformer_blocks.19.proj_mlp.input +25-08-31 12:40:50 | D | - Calibrating single_transformer_blocks.19.proj_out.linears.0.input +25-08-31 12:40:50 | D | - Calibrating single_transformer_blocks.19.proj_out.linears.1.input +25-08-31 12:40:50 | D | - Quantizing layer single_transformer_blocks.20 +25-08-31 12:40:50 | D | - Calibrating single_transformer_blocks.20.attn.to_q.input, single_transformer_blocks.20.attn.to_k.input, single_transformer_blocks.20.attn.to_v.input, single_transformer_blocks.20.proj_mlp.input +25-08-31 12:40:50 | D | - Calibrating single_transformer_blocks.20.proj_out.linears.0.input +25-08-31 12:40:51 | D | - Calibrating single_transformer_blocks.20.proj_out.linears.1.input +25-08-31 12:40:51 | D | - Quantizing layer single_transformer_blocks.21 +25-08-31 12:40:51 | D | - Calibrating single_transformer_blocks.21.attn.to_q.input, single_transformer_blocks.21.attn.to_k.input, single_transformer_blocks.21.attn.to_v.input, single_transformer_blocks.21.proj_mlp.input +25-08-31 12:40:51 | D | - Calibrating single_transformer_blocks.21.proj_out.linears.0.input +25-08-31 12:40:51 | D | - Calibrating single_transformer_blocks.21.proj_out.linears.1.input +25-08-31 12:40:51 | D | - Quantizing layer single_transformer_blocks.22 +25-08-31 12:40:51 | D | - Calibrating single_transformer_blocks.22.attn.to_q.input, single_transformer_blocks.22.attn.to_k.input, single_transformer_blocks.22.attn.to_v.input, single_transformer_blocks.22.proj_mlp.input +25-08-31 12:40:52 | D | - Calibrating single_transformer_blocks.22.proj_out.linears.0.input +25-08-31 12:40:52 | D | - Calibrating single_transformer_blocks.22.proj_out.linears.1.input +25-08-31 12:40:52 | D | - Quantizing layer single_transformer_blocks.23 +25-08-31 12:40:52 | D | - Calibrating single_transformer_blocks.23.attn.to_q.input, single_transformer_blocks.23.attn.to_k.input, single_transformer_blocks.23.attn.to_v.input, single_transformer_blocks.23.proj_mlp.input +25-08-31 12:40:52 | D | - Calibrating single_transformer_blocks.23.proj_out.linears.0.input +25-08-31 12:40:52 | D | - Calibrating single_transformer_blocks.23.proj_out.linears.1.input +25-08-31 12:40:52 | D | - Quantizing layer single_transformer_blocks.24 +25-08-31 12:40:52 | D | - Calibrating single_transformer_blocks.24.attn.to_q.input, single_transformer_blocks.24.attn.to_k.input, single_transformer_blocks.24.attn.to_v.input, single_transformer_blocks.24.proj_mlp.input +25-08-31 12:40:53 | D | - Calibrating single_transformer_blocks.24.proj_out.linears.0.input +25-08-31 12:40:53 | D | - Calibrating single_transformer_blocks.24.proj_out.linears.1.input +25-08-31 12:40:53 | D | - Quantizing layer single_transformer_blocks.25 +25-08-31 12:40:53 | D | - Calibrating single_transformer_blocks.25.attn.to_q.input, single_transformer_blocks.25.attn.to_k.input, single_transformer_blocks.25.attn.to_v.input, single_transformer_blocks.25.proj_mlp.input +25-08-31 12:40:53 | D | - Calibrating single_transformer_blocks.25.proj_out.linears.0.input +25-08-31 12:40:53 | D | - Calibrating single_transformer_blocks.25.proj_out.linears.1.input +25-08-31 12:40:54 | D | - Quantizing layer single_transformer_blocks.26 +25-08-31 12:40:54 | D | - Calibrating single_transformer_blocks.26.attn.to_q.input, single_transformer_blocks.26.attn.to_k.input, single_transformer_blocks.26.attn.to_v.input, single_transformer_blocks.26.proj_mlp.input +25-08-31 12:40:54 | D | - Calibrating single_transformer_blocks.26.proj_out.linears.0.input +25-08-31 12:40:54 | D | - Calibrating single_transformer_blocks.26.proj_out.linears.1.input +25-08-31 12:40:54 | D | - Quantizing layer single_transformer_blocks.27 +25-08-31 12:40:54 | D | - Calibrating single_transformer_blocks.27.attn.to_q.input, single_transformer_blocks.27.attn.to_k.input, single_transformer_blocks.27.attn.to_v.input, single_transformer_blocks.27.proj_mlp.input +25-08-31 12:40:54 | D | - Calibrating single_transformer_blocks.27.proj_out.linears.0.input +25-08-31 12:40:54 | D | - Calibrating single_transformer_blocks.27.proj_out.linears.1.input +25-08-31 12:40:55 | D | - Quantizing layer single_transformer_blocks.28 +25-08-31 12:40:55 | D | - Calibrating single_transformer_blocks.28.attn.to_q.input, single_transformer_blocks.28.attn.to_k.input, single_transformer_blocks.28.attn.to_v.input, single_transformer_blocks.28.proj_mlp.input +25-08-31 12:40:55 | D | - Calibrating single_transformer_blocks.28.proj_out.linears.0.input +25-08-31 12:40:55 | D | - Calibrating single_transformer_blocks.28.proj_out.linears.1.input +25-08-31 12:40:55 | D | - Quantizing layer single_transformer_blocks.29 +25-08-31 12:40:55 | D | - Calibrating single_transformer_blocks.29.attn.to_q.input, single_transformer_blocks.29.attn.to_k.input, single_transformer_blocks.29.attn.to_v.input, single_transformer_blocks.29.proj_mlp.input +25-08-31 12:40:55 | D | - Calibrating single_transformer_blocks.29.proj_out.linears.0.input +25-08-31 12:40:56 | D | - Calibrating single_transformer_blocks.29.proj_out.linears.1.input +25-08-31 12:40:56 | D | - Quantizing layer single_transformer_blocks.30 +25-08-31 12:40:56 | D | - Calibrating single_transformer_blocks.30.attn.to_q.input, single_transformer_blocks.30.attn.to_k.input, single_transformer_blocks.30.attn.to_v.input, single_transformer_blocks.30.proj_mlp.input +25-08-31 12:40:56 | D | - Calibrating single_transformer_blocks.30.proj_out.linears.0.input +25-08-31 12:40:56 | D | - Calibrating single_transformer_blocks.30.proj_out.linears.1.input +25-08-31 12:40:56 | D | - Quantizing layer single_transformer_blocks.31 +25-08-31 12:40:56 | D | - Calibrating single_transformer_blocks.31.attn.to_q.input, single_transformer_blocks.31.attn.to_k.input, single_transformer_blocks.31.attn.to_v.input, single_transformer_blocks.31.proj_mlp.input +25-08-31 12:40:57 | D | - Calibrating single_transformer_blocks.31.proj_out.linears.0.input +25-08-31 12:40:57 | D | - Calibrating single_transformer_blocks.31.proj_out.linears.1.input +25-08-31 12:40:57 | D | - Quantizing layer single_transformer_blocks.32 +25-08-31 12:40:57 | D | - Calibrating single_transformer_blocks.32.attn.to_q.input, single_transformer_blocks.32.attn.to_k.input, single_transformer_blocks.32.attn.to_v.input, single_transformer_blocks.32.proj_mlp.input +25-08-31 12:40:57 | D | - Calibrating single_transformer_blocks.32.proj_out.linears.0.input +25-08-31 12:40:57 | D | - Calibrating single_transformer_blocks.32.proj_out.linears.1.input +25-08-31 12:40:57 | D | - Quantizing layer single_transformer_blocks.33 +25-08-31 12:40:57 | D | - Calibrating single_transformer_blocks.33.attn.to_q.input, single_transformer_blocks.33.attn.to_k.input, single_transformer_blocks.33.attn.to_v.input, single_transformer_blocks.33.proj_mlp.input +25-08-31 12:40:58 | D | - Calibrating single_transformer_blocks.33.proj_out.linears.0.input +25-08-31 12:40:58 | D | - Calibrating single_transformer_blocks.33.proj_out.linears.1.input +25-08-31 12:40:58 | D | - Quantizing layer single_transformer_blocks.34 +25-08-31 12:40:58 | D | - Calibrating single_transformer_blocks.34.attn.to_q.input, single_transformer_blocks.34.attn.to_k.input, single_transformer_blocks.34.attn.to_v.input, single_transformer_blocks.34.proj_mlp.input +25-08-31 12:40:58 | D | - Calibrating single_transformer_blocks.34.proj_out.linears.0.input +25-08-31 12:40:58 | D | - Calibrating single_transformer_blocks.34.proj_out.linears.1.input +25-08-31 12:40:59 | D | - Quantizing layer single_transformer_blocks.35 +25-08-31 12:40:59 | D | - Calibrating single_transformer_blocks.35.attn.to_q.input, single_transformer_blocks.35.attn.to_k.input, single_transformer_blocks.35.attn.to_v.input, single_transformer_blocks.35.proj_mlp.input +25-08-31 12:40:59 | D | - Calibrating single_transformer_blocks.35.proj_out.linears.0.input +25-08-31 12:40:59 | D | - Calibrating single_transformer_blocks.35.proj_out.linears.1.input +25-08-31 12:40:59 | D | - Quantizing layer single_transformer_blocks.36 +25-08-31 12:40:59 | D | - Calibrating single_transformer_blocks.36.attn.to_q.input, single_transformer_blocks.36.attn.to_k.input, single_transformer_blocks.36.attn.to_v.input, single_transformer_blocks.36.proj_mlp.input +25-08-31 12:40:59 | D | - Calibrating single_transformer_blocks.36.proj_out.linears.0.input +25-08-31 12:41:00 | D | - Calibrating single_transformer_blocks.36.proj_out.linears.1.input +25-08-31 12:41:00 | D | - Quantizing layer single_transformer_blocks.37 +25-08-31 12:41:00 | D | - Calibrating single_transformer_blocks.37.attn.to_q.input, single_transformer_blocks.37.attn.to_k.input, single_transformer_blocks.37.attn.to_v.input, single_transformer_blocks.37.proj_mlp.input +25-08-31 12:41:00 | D | - Calibrating single_transformer_blocks.37.proj_out.linears.0.input +25-08-31 12:41:00 | D | - Calibrating single_transformer_blocks.37.proj_out.linears.1.input +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.0.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.0.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.0.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.0.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.0.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.0.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.0.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.0.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.0.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.0.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.0.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.0.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.1.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.1.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.1.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.1.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.1.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.1.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.1.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.1.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.1.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.1.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.1.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.1.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.2.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.2.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.2.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.2.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.2.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.2.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.2.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.2.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.2.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.2.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.2.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.2.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.3.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.3.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.3.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.3.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.3.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.3.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.3.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.3.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.3.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.3.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.3.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.3.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.4.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.4.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.4.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.4.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.4.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.4.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.4.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.4.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.4.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.4.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.4.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.4.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.5.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.5.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.5.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.5.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.5.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.5.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.5.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.5.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.5.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.5.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.5.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.5.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.6.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.6.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.6.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.6.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.6.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.6.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.6.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.6.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.6.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.6.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.6.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.6.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.7.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.7.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.7.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.7.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.7.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.7.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.7.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.7.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.7.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.7.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.7.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.7.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.8.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.8.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.8.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.8.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.8.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.8.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.8.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.8.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.8.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.8.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.8.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.8.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.9.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.9.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.9.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.9.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.9.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.9.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.9.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.9.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.9.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.9.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.9.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.9.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.10.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.10.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.10.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.10.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.10.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.10.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.10.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.10.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.10.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.10.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.10.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.10.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.11.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.11.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.11.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.11.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.11.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.11.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.11.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.11.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.11.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.11.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.11.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.11.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.12.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.12.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.12.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.12.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.12.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.12.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.12.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.12.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.12.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.12.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.12.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.12.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.13.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.13.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.13.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.13.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.13.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.13.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.13.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.13.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.13.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.13.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.13.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.13.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.14.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.14.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.14.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.14.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.14.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.14.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.14.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.14.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.14.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.14.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.14.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.14.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.15.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.15.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.15.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.15.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.15.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.15.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.15.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.15.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.15.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.15.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.15.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.15.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.16.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.16.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.16.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.16.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.16.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.16.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.16.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.16.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.16.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.16.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.16.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.16.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.17.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.17.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.17.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.17.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.17.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.17.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.17.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.17.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.17.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.17.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.17.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.17.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.18.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.18.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.18.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.18.attn.add_q_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.18.attn.add_k_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.18.attn.add_v_proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.18.attn.to_out.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.18.attn.to_add_out (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.18.ff.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.18.ff.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.18.ff_context.net.0.proj (inputs) +25-08-31 12:41:00 | D | - Quantizing transformer_blocks.18.ff_context.net.2 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.0.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.0.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.0.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.0.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.0.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.0.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.1.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.1.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.1.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.1.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.1.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.1.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.2.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.2.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.2.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.2.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.2.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.2.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.3.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.3.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.3.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.3.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.3.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.3.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.4.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.4.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.4.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.4.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.4.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.4.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.5.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.5.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.5.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.5.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.5.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.5.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.6.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.6.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.6.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.6.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.6.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.6.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.7.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.7.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.7.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.7.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.7.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.7.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.8.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.8.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.8.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.8.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.8.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.8.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.9.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.9.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.9.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.9.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.9.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.9.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.10.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.10.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.10.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.10.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.10.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.10.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.11.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.11.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.11.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.11.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.11.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.11.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.12.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.12.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.12.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.12.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.12.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.12.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.13.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.13.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.13.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.13.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.13.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.13.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.14.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.14.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.14.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.14.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.14.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.14.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.15.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.15.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.15.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.15.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.15.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.15.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.16.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.16.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.16.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.16.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.16.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.16.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.17.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.17.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.17.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.17.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.17.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.17.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.18.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.18.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.18.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.18.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.18.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.18.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.19.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.19.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.19.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.19.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.19.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.19.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.20.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.20.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.20.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.20.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.20.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.20.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.21.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.21.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.21.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.21.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.21.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.21.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.22.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.22.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.22.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.22.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.22.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.22.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.23.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.23.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.23.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.23.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.23.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.23.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.24.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.24.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.24.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.24.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.24.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.24.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.25.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.25.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.25.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.25.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.25.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.25.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.26.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.26.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.26.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.26.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.26.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.26.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.27.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.27.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.27.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.27.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.27.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.27.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.28.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.28.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.28.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.28.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.28.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.28.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.29.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.29.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.29.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.29.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.29.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.29.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.30.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.30.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.30.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.30.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.30.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.30.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.31.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.31.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.31.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.31.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.31.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.31.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.32.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.32.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.32.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.32.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.32.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.32.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.33.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.33.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.33.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.33.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.33.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.33.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.34.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.34.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.34.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.34.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.34.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.34.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.35.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.35.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.35.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.35.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.35.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.35.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.36.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.36.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.36.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.36.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.36.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.36.proj_out.linears.1 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.37.attn.to_q (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.37.attn.to_k (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.37.attn.to_v (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.37.proj_out.linears.0 (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.37.proj_mlp (inputs) +25-08-31 12:41:00 | D | - Quantizing single_transformer_blocks.37.proj_out.linears.1 (inputs) diff --git a/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/config-250828.070127.yaml b/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/config-250828.070127.yaml new file mode 100644 index 0000000000000000000000000000000000000000..57c1495ea9e667983c17e8d5808332b59d58906b --- /dev/null +++ b/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/config-250828.070127.yaml @@ -0,0 +1,236 @@ +enable_cache: true +cache: + root: runs +output: + root: runs + dirname: shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000 + job: run +pipeline: + name: flux.1-dev-ghibli + path: /home/pingzhi/checkpoints/fused_flux_dev_studio_ghibli_bf16 + dtype: torch.bfloat16 + device: cuda + shift_activations: true + enable_lora: false + task: text-to-image +eval: + protocol: fmeuler50-g3.5 + num_gpus: 1 + batch_size: 1 + batch_size_per_gpu: 1 + height: null + width: null + clean_caption: null + num_steps: 50 + guidance_scale: 3.5 + num_samples: 5000 + benchmarks: + - MJHQ + - DCI + gt_metrics: + - clip_iqa + - clip_score + - image_reward + - fid + ref_metrics: + - psnr + - lpips + - ssim + - fid + gen_root: '{output}/{job}' + ref_root: /data/pingzhi/deepcompressor/examples/diffusion/baselines/torch.bfloat16/flux.1-dev-ghibli/fmeuler50-g3.5 + gt_stats_root: benchmarks/stats + control_root: benchmarks + chunk_start: 0 + chunk_step: 1 + chunk_only: false +quant: + wgts: + dtype: sint4 + zero_point: null + group_shapes: + - - 1 + - 64 + - 1 + - 1 + - 1 + scale_dtypes: + - null + enable_kernel_gptq: false + enable_low_rank: true + low_rank: + rank: 32 + exclusive: false + compensate: false + degree: 2 + objective: OutputsError + strategy: Manual + sample_batch_size: 16 + sample_size: -1 + outputs_device: cpu + num_iters: 100 + early_stop: true + skips: + - down_resblock_conv + - down_transformer_add_norm + - down_transformer_norm + - mid_resblock_conv + - mid_transformer_add_norm + - mid_transformer_norm + - transformer_add_norm + - transformer_norm + - up_resblock_conv + - up_transformer_add_norm + - up_transformer_norm + enable_calib_range: true + calib_range: + degree: 2 + objective: OutputsError + strategy: Manual + granularity: Layer + element_batch_size: -1 + sample_batch_size: 16 + element_size: -1 + sample_size: -1 + pre_reshape: true + outputs_device: cpu + ratio: 1.0 + max_shrink: 0.2 + max_expand: 1.0 + num_grids: 80 + allow_scale: false + skips: [] + skips: + - down_resblock_shortcut + - down_resblock_time_proj + - down_sample + - down_transformer_proj_in + - down_transformer_proj_out + - input_embed + - mid_resblock_shortcut + - mid_resblock_time_proj + - mid_transformer_proj_in + - mid_transformer_proj_out + - output_embed + - text_embed + - time_embed + - up_resblock_shortcut + - up_resblock_time_proj + - up_sample + - up_transformer_proj_in + - up_transformer_proj_out + ipts: + dtype: sint4 + zero_point: null + group_shapes: + - - 1 + - 64 + - 1 + - 1 + - 1 + scale_dtypes: + - null + static: false + enable_calib_range: false + skips: + - down_resblock_shortcut + - down_resblock_time_proj + - down_sample + - down_transformer_add_norm + - down_transformer_norm + - down_transformer_proj_in + - down_transformer_proj_out + - input_embed + - mid_resblock_shortcut + - mid_resblock_time_proj + - mid_transformer_add_norm + - mid_transformer_norm + - mid_transformer_proj_in + - mid_transformer_proj_out + - output_embed + - text_embed + - time_embed + - transformer_add_norm + - transformer_norm + - up_resblock_shortcut + - up_resblock_time_proj + - up_sample + - up_transformer_add_norm + - up_transformer_norm + - up_transformer_proj_in + - up_transformer_proj_out + allow_unsigned: true + opts: + dtype: null + zero_point: null + group_shapes: + - - -1 + - -1 + - -1 + scale_dtypes: + - null + static: false + enable_calib_range: false + skips: [] + allow_unsigned: false + enable_extra_wgts: false + calib: + data: qdiff + num_samples: 128 + batch_size: 16 + path: /data/pingzhi/deepcompressor/examples/diffusion/datasets/torch.bfloat16/flux.1-dev-ghibli/fmeuler50-g3.5/qdiff/s128 + num_workers: 8 + enable_rotation: false + enable_smooth: true + smooth: + enable_proj: true + proj: + degree: 2 + objective: OutputsError + strategy: GridSearch + granularity: Layer + element_batch_size: -1 + sample_batch_size: 16 + element_size: -1 + sample_size: -1 + pre_reshape: true + outputs_device: cpu + fuse_when_possible: false + allow_a_quant: true + allow_b_quant: true + spans: + - - AbsMax + - AbsMax + alpha: 0.5 + beta: -2 + num_grids: 20 + allow_low_rank: true + skips: + - down_resblock_conv + - down_transformer_add_norm + - down_transformer_norm + - mid_resblock_conv + - mid_transformer_add_norm + - mid_transformer_norm + - transformer_add_norm + - transformer_norm + - up_resblock_conv + - up_transformer_add_norm + - up_transformer_norm + enable_attn: false + develop_dtype: torch.float32 +enable_text: false +text_cache: + root: '' + path: + rotation: '' + reorder: '' + smooth: '' + wgts: '' + acts: '' +seed: 12345 +skip_gen: true +skip_eval: true +load_from: '' +save_model: 'true' +copy_on_save: false diff --git a/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/branch.pt b/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/branch.pt new file mode 120000 index 0000000000000000000000000000000000000000..dd610b2d0de928229528351d5bfac4f311423884 --- /dev/null +++ b/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/branch.pt @@ -0,0 +1 @@ +../../../../../../../../../cache/quant/qdiff.128/branch/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]/shift/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]/lowrank.OutputsError.Manual.Layer.d2.en1.sn1/lowrank.i100.r32.earlystop/lowrank.skip.[rc+tan+tn]/flux.1-dev-ghibli.pt \ No newline at end of file diff --git a/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/model.pt b/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/model.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3f147205fa4bb7f5f4d5a952d69defa4468c856 --- /dev/null +++ b/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/model.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58385172e212ea5f720f78eb346eeff8554f6d8b778cd88ab13b2a0242cc7d8b +size 23803329729 diff --git a/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/scale.pt b/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/scale.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8572b3092bbfe62dcae0015b4f53565e6942578 --- /dev/null +++ b/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/scale.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa78f32c795f98d625d13a237b06f44d50e067327633bbadd7f5fbe6096fb45f +size 739952787 diff --git a/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/smooth.pt b/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/smooth.pt new file mode 120000 index 0000000000000000000000000000000000000000..bb20fe9000f9d87082de1eb1e2ee0ecb5719637a --- /dev/null +++ b/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/smooth.pt @@ -0,0 +1 @@ +../../../../../../../../../cache/quant/qdiff.128/smooth/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]/shift/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]/flux.1-dev-ghibli.pt \ No newline at end of file diff --git a/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/wgts.pt b/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/wgts.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e8470a79de8df8731813635d5339f17afbca949 --- /dev/null +++ b/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/model/wgts.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0780a44cf225507e60db7557da5e1665de9bcc7034b33dff598af18bbb06bda7 +size 40347 diff --git a/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/run-250828.070127.log b/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/run-250828.070127.log new file mode 100644 index 0000000000000000000000000000000000000000..74fb2802c7d6072511ee9ae1d299c5c5b34dc32d --- /dev/null +++ b/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127/run-250828.070127.log @@ -0,0 +1,20621 @@ +25-08-28 07:01:27 | I | === Configurations === +25-08-28 07:01:27 | I | DiffusionPtqRunConfig( +25-08-28 07:01:27 | I | cache=DiffusionPtqCacheConfig( +25-08-28 07:01:27 | I | root=runs, +25-08-28 07:01:27 | I | dirpath=DiffusionQuantCacheConfig(smooth='runs/diffusion/cache/quant/qdiff.128/smooth/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]/shift/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]', branch='runs/diffusion/cache/quant/qdiff.128/branch/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]/shift/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]/lowrank.OutputsError.Manual.Layer.d2.en1.sn1/lowrank.i100.r32.earlystop/lowrank.skip.[rc+tan+tn]', wgts='', acts=''), +25-08-28 07:01:27 | I | path=DiffusionQuantCacheConfig(smooth='runs/diffusion/cache/quant/qdiff.128/smooth/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]/shift/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]/flux.1-dev-ghibli.pt', branch='runs/diffusion/cache/quant/qdiff.128/branch/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]/shift/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]/lowrank.OutputsError.Manual.Layer.d2.en1.sn1/lowrank.i100.r32.earlystop/lowrank.skip.[rc+tan+tn]/flux.1-dev-ghibli.pt', wgts='', acts='')), +25-08-28 07:01:27 | I | output=OutputConfig( +25-08-28 07:01:27 | I | root=runs, +25-08-28 07:01:27 | I | dirname=shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000, +25-08-28 07:01:27 | I | job=run, +25-08-28 07:01:27 | I | dirpath=runs/diffusion/flux.1/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000, +25-08-28 07:01:27 | I | timestamp=250828.070127), +25-08-28 07:01:27 | I | pipeline=DiffusionPipelineConfig( +25-08-28 07:01:27 | I | name=flux.1-dev-ghibli, +25-08-28 07:01:27 | I | path=/home/pingzhi/checkpoints/fused_flux_dev_studio_ghibli_bf16, +25-08-28 07:01:27 | I | dtype=torch.bfloat16, +25-08-28 07:01:27 | I | device=cuda, +25-08-28 07:01:27 | I | shift_activations=True, +25-08-28 07:01:27 | I | lora=None, +25-08-28 07:01:27 | I | family=flux.1, +25-08-28 07:01:27 | I | task=text-to-image), +25-08-28 07:01:27 | I | eval=DiffusionEvalConfig( +25-08-28 07:01:27 | I | protocol=fmeuler50-g3.5, +25-08-28 07:01:27 | I | num_gpus=1, +25-08-28 07:01:27 | I | batch_size=1, +25-08-28 07:01:27 | I | batch_size_per_gpu=1, +25-08-28 07:01:27 | I | height=None, +25-08-28 07:01:27 | I | width=None, +25-08-28 07:01:27 | I | clean_caption=None, +25-08-28 07:01:27 | I | num_steps=50, +25-08-28 07:01:27 | I | guidance_scale=3.5, +25-08-28 07:01:27 | I | num_samples=5000, +25-08-28 07:01:27 | I | benchmarks=['MJHQ', 'DCI'], +25-08-28 07:01:27 | I | gt_metrics=['clip_iqa', 'clip_score', 'image_reward', 'fid'], +25-08-28 07:01:27 | I | ref_metrics=['psnr', 'lpips', 'ssim', 'fid'], +25-08-28 07:01:27 | I | gen_root={output}/{job}, +25-08-28 07:01:27 | I | ref_root=/data/pingzhi/deepcompressor/examples/diffusion/baselines/torch.bfloat16/flux.1-dev-ghibli/fmeuler50-g3.5, +25-08-28 07:01:27 | I | gt_stats_root=benchmarks/stats, +25-08-28 07:01:27 | I | control_root=benchmarks, +25-08-28 07:01:27 | I | chunk_start=0, +25-08-28 07:01:27 | I | chunk_step=1, +25-08-28 07:01:27 | I | chunk_only=False), +25-08-28 07:01:27 | I | quant=DiffusionQuantConfig( +25-08-28 07:01:27 | I | wgts=DiffusionWeightQuantizerConfig( +25-08-28 07:01:27 | I | dtype=sint4, +25-08-28 07:01:27 | I | zero_point=None, +25-08-28 07:01:27 | I | group_shapes=((1, 64, 1, 1, 1),), +25-08-28 07:01:27 | I | scale_dtypes=(None,), +25-08-28 07:01:27 | I | static=True, +25-08-28 07:01:27 | I | kernel_gptq=None, +25-08-28 07:01:27 | I | low_rank=SkipBasedQuantLowRankCalibConfig( +25-08-28 07:01:27 | I | rank=32, +25-08-28 07:01:27 | I | exclusive=False, +25-08-28 07:01:27 | I | compensate=False, +25-08-28 07:01:27 | I | degree=2, +25-08-28 07:01:27 | I | objective=SearchBasedCalibObjective.OutputsError, +25-08-28 07:01:27 | I | strategy=SearchBasedCalibStrategy.Manual, +25-08-28 07:01:27 | I | granularity=SearchBasedCalibGranularity.Layer, +25-08-28 07:01:27 | I | element_batch_size=-1, +25-08-28 07:01:27 | I | sample_batch_size=16, +25-08-28 07:01:27 | I | element_size=-1, +25-08-28 07:01:27 | I | sample_size=-1, +25-08-28 07:01:27 | I | pre_reshape=True, +25-08-28 07:01:27 | I | outputs_device=cpu, +25-08-28 07:01:27 | I | num_iters=100, +25-08-28 07:01:27 | I | early_stop=True, +25-08-28 07:01:27 | I | skips=['down_resblock_conv', 'down_transformer_add_norm', 'down_transformer_norm', 'mid_resblock_conv', 'mid_transformer_add_norm', 'mid_transformer_norm', 'transformer_add_norm', 'transformer_norm', 'up_resblock_conv', 'up_transformer_add_norm', 'up_transformer_norm']), +25-08-28 07:01:27 | I | calib_range=SkipBasedDynamicRangeCalibConfig( +25-08-28 07:01:27 | I | degree=2, +25-08-28 07:01:27 | I | objective=SearchBasedCalibObjective.OutputsError, +25-08-28 07:01:27 | I | strategy=SearchBasedCalibStrategy.Manual, +25-08-28 07:01:27 | I | granularity=SearchBasedCalibGranularity.Layer, +25-08-28 07:01:27 | I | element_batch_size=-1, +25-08-28 07:01:27 | I | sample_batch_size=16, +25-08-28 07:01:27 | I | element_size=-1, +25-08-28 07:01:27 | I | sample_size=-1, +25-08-28 07:01:27 | I | pre_reshape=True, +25-08-28 07:01:27 | I | outputs_device=cpu, +25-08-28 07:01:27 | I | ratio=1.0, +25-08-28 07:01:27 | I | max_shrink=0.2, +25-08-28 07:01:27 | I | max_expand=1.0, +25-08-28 07:01:27 | I | num_grids=80, +25-08-28 07:01:27 | I | allow_scale=False, +25-08-28 07:01:27 | I | skips=[]), +25-08-28 07:01:27 | I | skips=['down_resblock_shortcut', 'down_resblock_time_proj', 'down_sample', 'down_transformer_proj_in', 'down_transformer_proj_out', 'input_embed', 'mid_resblock_shortcut', 'mid_resblock_time_proj', 'mid_transformer_proj_in', 'mid_transformer_proj_out', 'output_embed', 'text_embed', 'time_embed', 'up_resblock_shortcut', 'up_resblock_time_proj', 'up_sample', 'up_transformer_proj_in', 'up_transformer_proj_out']), +25-08-28 07:01:27 | I | ipts=DiffusionActivationQuantizerConfig( +25-08-28 07:01:27 | I | dtype=sint4, +25-08-28 07:01:27 | I | zero_point=None, +25-08-28 07:01:27 | I | group_shapes=((1, 64, 1, 1, 1),), +25-08-28 07:01:27 | I | scale_dtypes=(None,), +25-08-28 07:01:27 | I | static=False, +25-08-28 07:01:27 | I | kernel_gptq=None, +25-08-28 07:01:27 | I | low_rank=None, +25-08-28 07:01:27 | I | calib_range=None, +25-08-28 07:01:27 | I | skips=['down_resblock_shortcut', 'down_resblock_time_proj', 'down_sample', 'down_transformer_add_norm', 'down_transformer_norm', 'down_transformer_proj_in', 'down_transformer_proj_out', 'input_embed', 'mid_resblock_shortcut', 'mid_resblock_time_proj', 'mid_transformer_add_norm', 'mid_transformer_norm', 'mid_transformer_proj_in', 'mid_transformer_proj_out', 'output_embed', 'text_embed', 'time_embed', 'transformer_add_norm', 'transformer_norm', 'up_resblock_shortcut', 'up_resblock_time_proj', 'up_sample', 'up_transformer_add_norm', 'up_transformer_norm', 'up_transformer_proj_in', 'up_transformer_proj_out'], +25-08-28 07:01:27 | I | allow_unsigned=True), +25-08-28 07:01:27 | I | opts=DiffusionActivationQuantizerConfig( +25-08-28 07:01:27 | I | dtype=None, +25-08-28 07:01:27 | I | zero_point=None, +25-08-28 07:01:27 | I | group_shapes=((-1, -1, -1),), +25-08-28 07:01:27 | I | scale_dtypes=(None,), +25-08-28 07:01:27 | I | static=False, +25-08-28 07:01:27 | I | kernel_gptq=None, +25-08-28 07:01:27 | I | low_rank=None, +25-08-28 07:01:27 | I | calib_range=None, +25-08-28 07:01:27 | I | skips=[], +25-08-28 07:01:27 | I | allow_unsigned=False), +25-08-28 07:01:27 | I | extra_wgts=None, +25-08-28 07:01:27 | I | unsigned_ipts=DiffusionActivationQuantizerConfig( +25-08-28 07:01:27 | I | dtype=uint4, +25-08-28 07:01:27 | I | zero_point=None, +25-08-28 07:01:27 | I | group_shapes=((1, 64, 1, 1, 1),), +25-08-28 07:01:27 | I | scale_dtypes=(None,), +25-08-28 07:01:27 | I | static=False, +25-08-28 07:01:27 | I | kernel_gptq=None, +25-08-28 07:01:27 | I | low_rank=None, +25-08-28 07:01:27 | I | calib_range=None, +25-08-28 07:01:27 | I | skips=['down_resblock_shortcut', 'down_resblock_time_proj', 'down_sample', 'down_transformer_add_norm', 'down_transformer_norm', 'down_transformer_proj_in', 'down_transformer_proj_out', 'input_embed', 'mid_resblock_shortcut', 'mid_resblock_time_proj', 'mid_transformer_add_norm', 'mid_transformer_norm', 'mid_transformer_proj_in', 'mid_transformer_proj_out', 'output_embed', 'text_embed', 'time_embed', 'transformer_add_norm', 'transformer_norm', 'up_resblock_shortcut', 'up_resblock_time_proj', 'up_sample', 'up_transformer_add_norm', 'up_transformer_norm', 'up_transformer_proj_in', 'up_transformer_proj_out'], +25-08-28 07:01:27 | I | allow_unsigned=True), +25-08-28 07:01:27 | I | calib=DiffusionCalibCacheLoaderConfig( +25-08-28 07:01:27 | I | data=qdiff, +25-08-28 07:01:27 | I | num_samples=128, +25-08-28 07:01:27 | I | batch_size=16, +25-08-28 07:01:27 | I | path=/data/pingzhi/deepcompressor/examples/diffusion/datasets/torch.bfloat16/flux.1-dev-ghibli/fmeuler50-g3.5/qdiff/s128, +25-08-28 07:01:27 | I | num_workers=8), +25-08-28 07:01:27 | I | rotation=None, +25-08-28 07:01:27 | I | smooth=SmoothTransfomerConfig( +25-08-28 07:01:27 | I | proj=SkipBasedSmoothCalibConfig( +25-08-28 07:01:27 | I | degree=2, +25-08-28 07:01:27 | I | objective=SearchBasedCalibObjective.OutputsError, +25-08-28 07:01:27 | I | strategy=SearchBasedCalibStrategy.GridSearch, +25-08-28 07:01:27 | I | granularity=SearchBasedCalibGranularity.Layer, +25-08-28 07:01:27 | I | element_batch_size=-1, +25-08-28 07:01:27 | I | sample_batch_size=16, +25-08-28 07:01:27 | I | element_size=-1, +25-08-28 07:01:27 | I | sample_size=-1, +25-08-28 07:01:27 | I | pre_reshape=True, +25-08-28 07:01:27 | I | outputs_device=cpu, +25-08-28 07:01:27 | I | fuse_when_possible=False, +25-08-28 07:01:27 | I | allow_a_quant=True, +25-08-28 07:01:27 | I | allow_b_quant=True, +25-08-28 07:01:27 | I | spans=[(, )], +25-08-28 07:01:27 | I | a_spans=[], +25-08-28 07:01:27 | I | b_spans=[], +25-08-28 07:01:27 | I | alpha=0.5, +25-08-28 07:01:27 | I | beta=-2, +25-08-28 07:01:27 | I | num_grids=20, +25-08-28 07:01:27 | I | allow_low_rank=True, +25-08-28 07:01:27 | I | skips=['down_resblock_conv', 'down_transformer_add_norm', 'down_transformer_norm', 'mid_resblock_conv', 'mid_transformer_add_norm', 'mid_transformer_norm', 'transformer_add_norm', 'transformer_norm', 'up_resblock_conv', 'up_transformer_add_norm', 'up_transformer_norm']), +25-08-28 07:01:27 | I | attn=None), +25-08-28 07:01:27 | I | develop_dtype=torch.float32), +25-08-28 07:01:27 | I | text=None, +25-08-28 07:01:27 | I | text_cache=LlmCacheConfig( +25-08-28 07:01:27 | I | root=, +25-08-28 07:01:27 | I | dirpath=LlmQuantCacheConfig( +25-08-28 07:01:27 | I | rotation=, +25-08-28 07:01:27 | I | reorder=, +25-08-28 07:01:27 | I | smooth=, +25-08-28 07:01:27 | I | wgts=, +25-08-28 07:01:27 | I | acts=), +25-08-28 07:01:27 | I | path=LlmQuantCacheConfig( +25-08-28 07:01:27 | I | rotation=, +25-08-28 07:01:27 | I | reorder=, +25-08-28 07:01:27 | I | smooth=, +25-08-28 07:01:27 | I | wgts=, +25-08-28 07:01:27 | I | acts=)), +25-08-28 07:01:27 | I | seed=12345, +25-08-28 07:01:27 | I | skip_gen=True, +25-08-28 07:01:27 | I | skip_eval=True, +25-08-28 07:01:27 | I | load_from=, +25-08-28 07:01:27 | I | save_model=true, +25-08-28 07:01:27 | I | copy_on_save=False) +25-08-28 07:01:27 | I | === Dumped Configurations === +25-08-28 07:01:27 | I | { 'cache': {'root': 'runs'}, +25-08-28 07:01:27 | I | 'copy_on_save': False, +25-08-28 07:01:27 | I | 'enable_cache': True, +25-08-28 07:01:27 | I | 'enable_text': False, +25-08-28 07:01:27 | I | 'eval': { 'batch_size': 1, +25-08-28 07:01:27 | I | 'batch_size_per_gpu': 1, +25-08-28 07:01:27 | I | 'benchmarks': ['MJHQ', 'DCI'], +25-08-28 07:01:27 | I | 'chunk_only': False, +25-08-28 07:01:27 | I | 'chunk_start': 0, +25-08-28 07:01:27 | I | 'chunk_step': 1, +25-08-28 07:01:27 | I | 'clean_caption': None, +25-08-28 07:01:27 | I | 'control_root': 'benchmarks', +25-08-28 07:01:27 | I | 'gen_root': '{output}/{job}', +25-08-28 07:01:27 | I | 'gt_metrics': ['clip_iqa', 'clip_score', 'image_reward', 'fid'], +25-08-28 07:01:27 | I | 'gt_stats_root': 'benchmarks/stats', +25-08-28 07:01:27 | I | 'guidance_scale': 3.5, +25-08-28 07:01:27 | I | 'height': None, +25-08-28 07:01:27 | I | 'num_gpus': 1, +25-08-28 07:01:27 | I | 'num_samples': 5000, +25-08-28 07:01:27 | I | 'num_steps': 50, +25-08-28 07:01:27 | I | 'protocol': 'fmeuler50-g3.5', +25-08-28 07:01:27 | I | 'ref_metrics': ['psnr', 'lpips', 'ssim', 'fid'], +25-08-28 07:01:27 | I | 'ref_root': '/data/pingzhi/deepcompressor/examples/diffusion/baselines/torch.bfloat16/flux.1-dev-ghibli/fmeuler50-g3.5', +25-08-28 07:01:27 | I | 'width': None}, +25-08-28 07:01:27 | I | 'load_from': '', +25-08-28 07:01:27 | I | 'output': { 'dirname': 'shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000', +25-08-28 07:01:27 | I | 'job': 'run', +25-08-28 07:01:27 | I | 'root': 'runs'}, +25-08-28 07:01:27 | I | 'pipeline': { 'device': 'cuda', +25-08-28 07:01:27 | I | 'dtype': 'torch.bfloat16', +25-08-28 07:01:27 | I | 'enable_lora': False, +25-08-28 07:01:27 | I | 'name': 'flux.1-dev-ghibli', +25-08-28 07:01:27 | I | 'path': '/home/pingzhi/checkpoints/fused_flux_dev_studio_ghibli_bf16', +25-08-28 07:01:27 | I | 'shift_activations': True, +25-08-28 07:01:27 | I | 'task': 'text-to-image'}, +25-08-28 07:01:27 | I | 'quant': { 'calib': { 'batch_size': 16, +25-08-28 07:01:27 | I | 'data': 'qdiff', +25-08-28 07:01:27 | I | 'num_samples': 128, +25-08-28 07:01:27 | I | 'num_workers': 8, +25-08-28 07:01:27 | I | 'path': '/data/pingzhi/deepcompressor/examples/diffusion/datasets/torch.bfloat16/flux.1-dev-ghibli/fmeuler50-g3.5/qdiff/s128'}, +25-08-28 07:01:27 | I | 'develop_dtype': 'torch.float32', +25-08-28 07:01:27 | I | 'enable_extra_wgts': False, +25-08-28 07:01:27 | I | 'enable_rotation': False, +25-08-28 07:01:27 | I | 'enable_smooth': True, +25-08-28 07:01:27 | I | 'ipts': { 'allow_unsigned': True, +25-08-28 07:01:27 | I | 'dtype': 'sint4', +25-08-28 07:01:27 | I | 'enable_calib_range': False, +25-08-28 07:01:27 | I | 'group_shapes': [[1, 64, 1, 1, 1]], +25-08-28 07:01:27 | I | 'scale_dtypes': [None], +25-08-28 07:01:27 | I | 'skips': [ 'down_resblock_shortcut', +25-08-28 07:01:27 | I | 'down_resblock_time_proj', +25-08-28 07:01:27 | I | 'down_sample', +25-08-28 07:01:27 | I | 'down_transformer_add_norm', +25-08-28 07:01:27 | I | 'down_transformer_norm', +25-08-28 07:01:27 | I | 'down_transformer_proj_in', +25-08-28 07:01:27 | I | 'down_transformer_proj_out', +25-08-28 07:01:27 | I | 'input_embed', +25-08-28 07:01:27 | I | 'mid_resblock_shortcut', +25-08-28 07:01:27 | I | 'mid_resblock_time_proj', +25-08-28 07:01:27 | I | 'mid_transformer_add_norm', +25-08-28 07:01:27 | I | 'mid_transformer_norm', +25-08-28 07:01:27 | I | 'mid_transformer_proj_in', +25-08-28 07:01:27 | I | 'mid_transformer_proj_out', +25-08-28 07:01:27 | I | 'output_embed', +25-08-28 07:01:27 | I | 'text_embed', +25-08-28 07:01:27 | I | 'time_embed', +25-08-28 07:01:27 | I | 'transformer_add_norm', +25-08-28 07:01:27 | I | 'transformer_norm', +25-08-28 07:01:27 | I | 'up_resblock_shortcut', +25-08-28 07:01:27 | I | 'up_resblock_time_proj', +25-08-28 07:01:27 | I | 'up_sample', +25-08-28 07:01:27 | I | 'up_transformer_add_norm', +25-08-28 07:01:27 | I | 'up_transformer_norm', +25-08-28 07:01:27 | I | 'up_transformer_proj_in', +25-08-28 07:01:27 | I | 'up_transformer_proj_out'], +25-08-28 07:01:27 | I | 'static': False, +25-08-28 07:01:27 | I | 'zero_point': None}, +25-08-28 07:01:27 | I | 'opts': { 'allow_unsigned': False, +25-08-28 07:01:27 | I | 'dtype': None, +25-08-28 07:01:27 | I | 'enable_calib_range': False, +25-08-28 07:01:27 | I | 'group_shapes': [[-1, -1, -1]], +25-08-28 07:01:27 | I | 'scale_dtypes': [None], +25-08-28 07:01:27 | I | 'skips': [], +25-08-28 07:01:27 | I | 'static': False, +25-08-28 07:01:27 | I | 'zero_point': None}, +25-08-28 07:01:27 | I | 'smooth': { 'enable_attn': False, +25-08-28 07:01:27 | I | 'enable_proj': True, +25-08-28 07:01:27 | I | 'proj': { 'allow_a_quant': True, +25-08-28 07:01:27 | I | 'allow_b_quant': True, +25-08-28 07:01:27 | I | 'allow_low_rank': True, +25-08-28 07:01:27 | I | 'alpha': 0.5, +25-08-28 07:01:27 | I | 'beta': -2, +25-08-28 07:01:27 | I | 'degree': 2, +25-08-28 07:01:27 | I | 'element_batch_size': -1, +25-08-28 07:01:27 | I | 'element_size': -1, +25-08-28 07:01:27 | I | 'fuse_when_possible': False, +25-08-28 07:01:27 | I | 'granularity': 'Layer', +25-08-28 07:01:27 | I | 'num_grids': 20, +25-08-28 07:01:27 | I | 'objective': 'OutputsError', +25-08-28 07:01:27 | I | 'outputs_device': 'cpu', +25-08-28 07:01:27 | I | 'pre_reshape': True, +25-08-28 07:01:27 | I | 'sample_batch_size': 16, +25-08-28 07:01:27 | I | 'sample_size': -1, +25-08-28 07:01:27 | I | 'skips': [ 'down_resblock_conv', +25-08-28 07:01:27 | I | 'down_transformer_add_norm', +25-08-28 07:01:27 | I | 'down_transformer_norm', +25-08-28 07:01:27 | I | 'mid_resblock_conv', +25-08-28 07:01:27 | I | 'mid_transformer_add_norm', +25-08-28 07:01:27 | I | 'mid_transformer_norm', +25-08-28 07:01:27 | I | 'transformer_add_norm', +25-08-28 07:01:27 | I | 'transformer_norm', +25-08-28 07:01:27 | I | 'up_resblock_conv', +25-08-28 07:01:27 | I | 'up_transformer_add_norm', +25-08-28 07:01:27 | I | 'up_transformer_norm'], +25-08-28 07:01:27 | I | 'spans': [['AbsMax', 'AbsMax']], +25-08-28 07:01:27 | I | 'strategy': 'GridSearch'}}, +25-08-28 07:01:27 | I | 'wgts': { 'calib_range': { 'allow_scale': False, +25-08-28 07:01:27 | I | 'degree': 2, +25-08-28 07:01:27 | I | 'element_batch_size': -1, +25-08-28 07:01:27 | I | 'element_size': -1, +25-08-28 07:01:27 | I | 'granularity': 'Layer', +25-08-28 07:01:27 | I | 'max_expand': 1.0, +25-08-28 07:01:27 | I | 'max_shrink': 0.2, +25-08-28 07:01:27 | I | 'num_grids': 80, +25-08-28 07:01:27 | I | 'objective': 'OutputsError', +25-08-28 07:01:27 | I | 'outputs_device': 'cpu', +25-08-28 07:01:27 | I | 'pre_reshape': True, +25-08-28 07:01:27 | I | 'ratio': 1.0, +25-08-28 07:01:27 | I | 'sample_batch_size': 16, +25-08-28 07:01:27 | I | 'sample_size': -1, +25-08-28 07:01:27 | I | 'skips': [], +25-08-28 07:01:27 | I | 'strategy': 'Manual'}, +25-08-28 07:01:27 | I | 'dtype': 'sint4', +25-08-28 07:01:27 | I | 'enable_calib_range': True, +25-08-28 07:01:27 | I | 'enable_kernel_gptq': False, +25-08-28 07:01:27 | I | 'enable_low_rank': True, +25-08-28 07:01:27 | I | 'group_shapes': [[1, 64, 1, 1, 1]], +25-08-28 07:01:27 | I | 'low_rank': { 'compensate': False, +25-08-28 07:01:27 | I | 'degree': 2, +25-08-28 07:01:27 | I | 'early_stop': True, +25-08-28 07:01:27 | I | 'exclusive': False, +25-08-28 07:01:27 | I | 'num_iters': 100, +25-08-28 07:01:27 | I | 'objective': 'OutputsError', +25-08-28 07:01:27 | I | 'outputs_device': 'cpu', +25-08-28 07:01:27 | I | 'rank': 32, +25-08-28 07:01:27 | I | 'sample_batch_size': 16, +25-08-28 07:01:27 | I | 'sample_size': -1, +25-08-28 07:01:27 | I | 'skips': [ 'down_resblock_conv', +25-08-28 07:01:27 | I | 'down_transformer_add_norm', +25-08-28 07:01:27 | I | 'down_transformer_norm', +25-08-28 07:01:27 | I | 'mid_resblock_conv', +25-08-28 07:01:27 | I | 'mid_transformer_add_norm', +25-08-28 07:01:27 | I | 'mid_transformer_norm', +25-08-28 07:01:27 | I | 'transformer_add_norm', +25-08-28 07:01:27 | I | 'transformer_norm', +25-08-28 07:01:27 | I | 'up_resblock_conv', +25-08-28 07:01:27 | I | 'up_transformer_add_norm', +25-08-28 07:01:27 | I | 'up_transformer_norm'], +25-08-28 07:01:27 | I | 'strategy': 'Manual'}, +25-08-28 07:01:27 | I | 'scale_dtypes': [None], +25-08-28 07:01:27 | I | 'skips': [ 'down_resblock_shortcut', +25-08-28 07:01:27 | I | 'down_resblock_time_proj', +25-08-28 07:01:27 | I | 'down_sample', +25-08-28 07:01:27 | I | 'down_transformer_proj_in', +25-08-28 07:01:27 | I | 'down_transformer_proj_out', +25-08-28 07:01:27 | I | 'input_embed', +25-08-28 07:01:27 | I | 'mid_resblock_shortcut', +25-08-28 07:01:27 | I | 'mid_resblock_time_proj', +25-08-28 07:01:27 | I | 'mid_transformer_proj_in', +25-08-28 07:01:27 | I | 'mid_transformer_proj_out', +25-08-28 07:01:27 | I | 'output_embed', +25-08-28 07:01:27 | I | 'text_embed', +25-08-28 07:01:27 | I | 'time_embed', +25-08-28 07:01:27 | I | 'up_resblock_shortcut', +25-08-28 07:01:27 | I | 'up_resblock_time_proj', +25-08-28 07:01:27 | I | 'up_sample', +25-08-28 07:01:27 | I | 'up_transformer_proj_in', +25-08-28 07:01:27 | I | 'up_transformer_proj_out'], +25-08-28 07:01:27 | I | 'zero_point': None}}, +25-08-28 07:01:27 | I | 'save_model': 'true', +25-08-28 07:01:27 | I | 'seed': 12345, +25-08-28 07:01:27 | I | 'skip_eval': True, +25-08-28 07:01:27 | I | 'skip_gen': True, +25-08-28 07:01:27 | I | 'text_cache': {'path': {'acts': '', 'reorder': '', 'rotation': '', 'smooth': '', 'wgts': ''}, 'root': ''}} +25-08-28 07:01:27 | I | === Output Directory === +25-08-28 07:01:27 | I | runs/diffusion/flux.1/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000/run-250828.070127 +25-08-28 07:01:27 | I | === Start Evaluating === +25-08-28 07:01:27 | I | * Building diffusion model pipeline +25-08-28 07:01:37 | I | Replacing fused Linear with ConcatLinear. +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.0 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.1 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.2 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.3 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.4 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.5 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.6 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.7 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.8 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.9 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.10 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.11 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.12 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.13 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.14 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.15 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.16 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.17 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.18 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.19 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.20 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.21 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.22 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.23 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.24 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.25 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.26 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.27 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.28 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.29 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.30 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.31 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.32 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.33 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.34 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.35 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.36 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | + Replacing fused Linear in single_transformer_blocks.37 with ConcatLinear. +25-08-28 07:01:37 | I | - in_features = 3072/15360 +25-08-28 07:01:37 | I | - out_features = 3072 +25-08-28 07:01:37 | I | - Shifting input activations. +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.0.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.0.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.0.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.0.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.1.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.1.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.1.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.1.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.2.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.2.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.2.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.2.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.3.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.3.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.3.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.3.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.4.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.4.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.4.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.4.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.5.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.5.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.5.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.5.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.6.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.6.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.6.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.6.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.7.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.7.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.7.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.7.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.8.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.8.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.8.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.8.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.9.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.9.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.9.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.9.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.10.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.10.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.10.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.10.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.11.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.11.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.11.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.11.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.12.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.12.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.12.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.12.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.13.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.13.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.13.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.13.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.14.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.14.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.14.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.14.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.15.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.15.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.15.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.15.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.16.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.16.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.16.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.16.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.17.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.17.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.17.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.17.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.18.ff.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.18.ff.net +25-08-28 07:01:37 | I | + Shifting input activations of transformer_blocks.18.ff_context.net.2 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 2 in transformer_blocks.18.ff_context.net +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.0.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.0.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.1.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.1.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.2.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.2.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.3.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.3.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.4.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.4.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.5.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.5.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.6.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.6.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.7.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.7.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.8.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.8.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.9.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.9.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.10.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.10.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.11.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.11.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.12.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.12.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.13.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.13.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.14.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.14.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.15.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.15.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.16.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.16.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.17.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.17.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.18.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.18.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.19.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.19.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.20.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.20.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.21.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.21.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.22.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.22.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.23.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.23.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.24.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.24.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.25.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.25.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.26.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.26.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.27.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.27.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.28.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.28.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.29.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.29.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.30.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.30.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.31.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.31.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.32.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.32.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.33.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.33.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.34.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.34.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.35.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.35.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.36.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.36.proj_out.linears +25-08-28 07:01:37 | I | + Shifting input activations of single_transformer_blocks.37.proj_out.linears.1 by 0.171875 +25-08-28 07:01:37 | I | + Replacing 1 in single_transformer_blocks.37.proj_out.linears +25-08-28 07:01:37 | I | * Smoothing model for quantization +25-08-28 07:01:37 | I | - Generating smooth scales +25-08-28 07:02:38 | D | - Smoothing Diffusion Block transformer_blocks.0 +25-08-28 07:02:38 | D | - Skipping Module transformer_blocks.0.norm1.linear +25-08-28 07:02:38 | D | - Skipping Module transformer_blocks.0.norm1_context.linear +25-08-28 07:02:38 | D | - Smoothing Transformer Block transformer_blocks.0 +25-08-28 07:02:38 | D | - transformer_blocks.0.attn.qkv_proj +25-08-28 07:02:38 | D | + w: sint4 +25-08-28 07:02:38 | D | + x: sint4 +25-08-28 07:02:38 | D | + y: None +25-08-28 07:02:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:02:38 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:02:39 | D | + x - AbsMax +25-08-28 07:02:39 | D | + x = [min=0.0962, max=17.8750] +25-08-28 07:02:39 | D | + w - AbsMax +25-08-28 07:02:39 | D | + w = [min=0.1064, max=2.4375] +25-08-28 07:02:39 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:02:41 | D | + finished calculating the original outputs, ram usage: 19.3 +25-08-28 07:04:02 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:04:02 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:04:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:04:02 | D | - sum error = [ 1064.3799, 996.7048, 887.4095, 819.9966, 739.5454] +25-08-28 07:04:02 | D | - best error = [ 1064.3799, 996.7048, 887.4095, 819.9966, 739.5454] +25-08-28 07:04:02 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:04:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:04:02 | D | - sum error = [ 631.5131, 588.7467, 543.8616, 527.3550, 517.7455] +25-08-28 07:04:02 | D | - best error = [ 631.5131, 588.7467, 543.8616, 527.3550, 517.7455] +25-08-28 07:04:02 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:04:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:04:02 | D | - sum error = [ 511.2745, 511.3791, 535.6498, 570.5857, 667.6604] +25-08-28 07:04:02 | D | - best error = [ 511.2745, 511.2745, 511.2745, 511.2745, 511.2745] +25-08-28 07:04:02 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:04:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:04:02 | D | - sum error = [ 732.6610, 814.5998, 934.6103, 1059.9018, 1223.7191] +25-08-28 07:04:02 | D | - best error = [ 511.2745, 511.2745, 511.2745, 511.2745, 511.2745] +25-08-28 07:04:02 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:04:02 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:04:02 | D | - sum error = [13175.5094, 12696.5085, 10200.5561, 7716.8390, 5296.0429] +25-08-28 07:04:02 | D | - best error = [ 511.2745, 511.2745, 511.2745, 511.2745, 511.2745] +25-08-28 07:04:02 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:04:02 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:04:02 | D | - sum error = [ 3560.8305, 2587.8249, 1953.3106, 1623.9587, 1394.6759] +25-08-28 07:04:02 | D | - best error = [ 511.2745, 511.2745, 511.2745, 511.2745, 511.2745] +25-08-28 07:04:02 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:04:02 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:04:02 | D | - sum error = [ 1322.6286, 1197.7395, 1095.9783, 989.1989, 998.1579] +25-08-28 07:04:02 | D | - best error = [ 511.2745, 511.2745, 511.2745, 511.2745, 511.2745] +25-08-28 07:04:02 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:04:02 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:04:02 | D | - sum error = [ 961.2811, 1022.5864, 1092.7644, 1228.3923] +25-08-28 07:04:02 | D | - best error = [ 511.2745, 511.2745, 511.2745, 511.2745] +25-08-28 07:04:02 | D | + error = 511.2745 +25-08-28 07:04:02 | D | + scale = [min=0.3101, max=4.2279] +25-08-28 07:04:02 | D | - transformer_blocks.0.attn add_qkv_proj +25-08-28 07:04:02 | D | + w: sint4 +25-08-28 07:04:02 | D | + x: sint4 +25-08-28 07:04:02 | D | + y: None +25-08-28 07:04:02 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:04:02 | D | + finished parsing calibration arguments, ram usage: 19.3 +25-08-28 07:04:02 | D | + x - AbsMax +25-08-28 07:04:02 | D | + x = [min=0.1777, max=18.3750] +25-08-28 07:04:02 | D | + w - AbsMax +25-08-28 07:04:02 | D | + w = [min=0.1182, max=0.5391] +25-08-28 07:04:02 | D | + finished resetting calibrator, ram usage: 19.3 +25-08-28 07:04:03 | D | + finished calculating the original outputs, ram usage: 19.4 +25-08-28 07:05:11 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:05:11 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:05:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:05:11 | D | - sum error = [ 745.1206, 713.6668, 702.3928, 668.1295, 643.3507] +25-08-28 07:05:11 | D | - best error = [ 745.1206, 713.6668, 702.3928, 668.1295, 643.3507] +25-08-28 07:05:11 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:05:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:05:11 | D | - sum error = [ 629.7263, 597.1207, 580.5638, 562.2960, 554.3014] +25-08-28 07:05:11 | D | - best error = [ 629.7263, 597.1207, 580.5638, 562.2960, 554.3014] +25-08-28 07:05:11 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:05:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:05:11 | D | - sum error = [ 539.4201, 523.1196, 514.9002, 515.3600, 516.8578] +25-08-28 07:05:11 | D | - best error = [ 539.4201, 523.1196, 514.9002, 514.9002, 514.9002] +25-08-28 07:05:11 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:05:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:05:11 | D | - sum error = [ 517.9014, 532.3081, 527.2281, 555.9314, 558.3527] +25-08-28 07:05:11 | D | - best error = [ 514.9002, 514.9002, 514.9002, 514.9002, 514.9002] +25-08-28 07:05:11 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:05:11 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:05:11 | D | - sum error = [ 1144.9845, 1047.7609, 1013.9370, 955.2202, 888.8192] +25-08-28 07:05:11 | D | - best error = [ 514.9002, 514.9002, 514.9002, 514.9002, 514.9002] +25-08-28 07:05:11 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:05:11 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:05:11 | D | - sum error = [ 824.3203, 787.2870, 737.3928, 703.9981, 666.1844] +25-08-28 07:05:11 | D | - best error = [ 514.9002, 514.9002, 514.9002, 514.9002, 514.9002] +25-08-28 07:05:11 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:05:11 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:05:11 | D | - sum error = [ 630.3960, 590.3280, 568.8704, 555.1370, 551.3687] +25-08-28 07:05:11 | D | - best error = [ 514.9002, 514.9002, 514.9002, 514.9002, 514.9002] +25-08-28 07:05:11 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:05:11 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:05:11 | D | - sum error = [ 551.8323, 543.7482, 553.1228, 557.5297] +25-08-28 07:05:11 | D | - best error = [ 514.9002, 514.9002, 514.9002, 514.9002] +25-08-28 07:05:11 | D | + error = 514.9002 +25-08-28 07:05:11 | D | + scale = [min=0.3547, max=5.7350] +25-08-28 07:05:12 | D | - transformer_blocks.0.attn.out_proj + transformer_blocks.0.attn.add_out_proj +25-08-28 07:05:12 | D | + w: sint4 +25-08-28 07:05:12 | D | + x: sint4 +25-08-28 07:05:12 | D | + y: None +25-08-28 07:05:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:05:12 | D | + finished parsing calibration arguments, ram usage: 19.4 +25-08-28 07:05:12 | D | + x - AbsMax +25-08-28 07:05:12 | D | + x = [min=0.0562, max=1.6016] +25-08-28 07:05:12 | D | + w - AbsMax +25-08-28 07:05:12 | D | + w = [min=0.0986, max=0.4082] +25-08-28 07:05:12 | D | + finished resetting calibrator, ram usage: 19.4 +25-08-28 07:05:13 | D | + finished calculating the original outputs, ram usage: 19.4 +25-08-28 07:07:04 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:07:04 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:07:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:07:04 | D | - sum error = [ 1776.1978, 1662.7175, 1554.7768, 1472.5910, 1398.9409] +25-08-28 07:07:04 | D | - best error = [ 1776.1978, 1662.7175, 1554.7768, 1472.5910, 1398.9409] +25-08-28 07:07:04 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:07:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:07:04 | D | - sum error = [ 1341.5569, 1291.8129, 1264.0009, 1250.5701, 1240.0725] +25-08-28 07:07:04 | D | - best error = [ 1341.5569, 1291.8129, 1264.0009, 1250.5701, 1240.0725] +25-08-28 07:07:04 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:07:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:07:04 | D | - sum error = [ 1237.2786, 1251.0652, 1277.0262, 1324.9190, 1362.2413] +25-08-28 07:07:04 | D | - best error = [ 1237.2786, 1237.2786, 1237.2786, 1237.2786, 1237.2786] +25-08-28 07:07:04 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:07:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:07:04 | D | - sum error = [ 1413.9634, 1486.4488, 1571.3691, 1663.5900, 1772.4437] +25-08-28 07:07:04 | D | - best error = [ 1237.2786, 1237.2786, 1237.2786, 1237.2786, 1237.2786] +25-08-28 07:07:04 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:07:04 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:07:04 | D | - sum error = [ 2208.4986, 2037.1343, 1880.8726, 1740.1937, 1623.8356] +25-08-28 07:07:04 | D | - best error = [ 1237.2786, 1237.2786, 1237.2786, 1237.2786, 1237.2786] +25-08-28 07:07:04 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:07:04 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:07:04 | D | - sum error = [ 1520.9510, 1425.2618, 1366.6405, 1318.1465, 1289.4992] +25-08-28 07:07:04 | D | - best error = [ 1237.2786, 1237.2786, 1237.2786, 1237.2786, 1237.2786] +25-08-28 07:07:04 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:07:04 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:07:04 | D | - sum error = [ 1284.5442, 1279.9696, 1300.9585, 1335.0777, 1381.0093] +25-08-28 07:07:04 | D | - best error = [ 1237.2786, 1237.2786, 1237.2786, 1237.2786, 1237.2786] +25-08-28 07:07:04 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:07:04 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:07:04 | D | - sum error = [ 1450.1680, 1540.0380, 1635.3495, 1752.2166] +25-08-28 07:07:04 | D | - best error = [ 1237.2786, 1237.2786, 1237.2786, 1237.2786] +25-08-28 07:07:04 | D | + error = 1237.2786 +25-08-28 07:07:04 | D | + scale = [min=0.2370, max=1.2655] +25-08-28 07:07:04 | D | - transformer_blocks.0.ff.up_proj +25-08-28 07:07:04 | D | + w: sint4 +25-08-28 07:07:04 | D | + x: sint4 +25-08-28 07:07:04 | D | + y: None +25-08-28 07:07:04 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:07:04 | D | + finished parsing calibration arguments, ram usage: 17.5 +25-08-28 07:07:05 | D | + x - AbsMax +25-08-28 07:07:05 | D | + x = [min=0.0310, max=5.2500] +25-08-28 07:07:05 | D | + w - AbsMax +25-08-28 07:07:05 | D | + w = [min=0.0306, max=0.4395] +25-08-28 07:07:05 | D | + finished resetting calibrator, ram usage: 17.5 +25-08-28 07:07:11 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 07:08:30 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:08:30 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:08:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:08:30 | D | - sum error = [ 1653.5739, 1585.4121, 1520.5677, 1469.0677, 1419.5771] +25-08-28 07:08:30 | D | - best error = [ 1653.5739, 1585.4121, 1520.5677, 1469.0677, 1419.5771] +25-08-28 07:08:30 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:08:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:08:30 | D | - sum error = [ 1378.9609, 1340.2739, 1305.5133, 1279.4360, 1261.3263] +25-08-28 07:08:30 | D | - best error = [ 1378.9609, 1340.2739, 1305.5133, 1279.4360, 1261.3263] +25-08-28 07:08:30 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:08:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:08:30 | D | - sum error = [ 1246.0476, 1236.8989, 1232.9347, 1234.4105, 1237.3592] +25-08-28 07:08:30 | D | - best error = [ 1246.0476, 1236.8989, 1232.9347, 1232.9347, 1232.9347] +25-08-28 07:08:30 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:08:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:08:30 | D | - sum error = [ 1249.0574, 1268.4900, 1296.1285, 1333.4139, 1382.0038] +25-08-28 07:08:30 | D | - best error = [ 1232.9347, 1232.9347, 1232.9347, 1232.9347, 1232.9347] +25-08-28 07:08:30 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:08:30 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:08:30 | D | - sum error = [ 2272.6229, 2040.8891, 1865.1645, 1735.5599, 1622.8436] +25-08-28 07:08:30 | D | - best error = [ 1232.9347, 1232.9347, 1232.9347, 1232.9347, 1232.9347] +25-08-28 07:08:30 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:08:30 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:08:30 | D | - sum error = [ 1533.1724, 1458.0918, 1390.8464, 1337.0980, 1293.2597] +25-08-28 07:08:30 | D | - best error = [ 1232.9347, 1232.9347, 1232.9347, 1232.9347, 1232.9347] +25-08-28 07:08:30 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:08:30 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:08:30 | D | - sum error = [ 1261.8416, 1238.2865, 1222.3283, 1217.4853, 1221.3522] +25-08-28 07:08:30 | D | - best error = [ 1232.9347, 1232.9347, 1222.3283, 1217.4853, 1217.4853] +25-08-28 07:08:30 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:08:30 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:08:30 | D | - sum error = [ 1234.7249, 1262.7003, 1301.0848, 1360.8918] +25-08-28 07:08:30 | D | - best error = [ 1217.4853, 1217.4853, 1217.4853, 1217.4853] +25-08-28 07:08:30 | D | + error = 1217.4853 +25-08-28 07:08:30 | D | + scale = [min=0.2292, max=5.7264] +25-08-28 07:08:30 | D | - transformer_blocks.0.ff.down_proj +25-08-28 07:08:30 | D | + w: sint4 +25-08-28 07:08:30 | D | + x: uint4 +25-08-28 07:08:30 | D | + y: None +25-08-28 07:08:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:08:30 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 07:08:35 | D | + x - AbsMax +25-08-28 07:08:35 | D | + x = [min=0.2656, max=7.4375] +25-08-28 07:08:35 | D | + w - AbsMax +25-08-28 07:08:35 | D | + w = [min=0.0571, max=0.6055] +25-08-28 07:08:35 | D | + finished resetting calibrator, ram usage: 19.1 +25-08-28 07:08:37 | D | + finished calculating the original outputs, ram usage: 19.1 +25-08-28 07:10:34 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:10:34 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:10:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:10:34 | D | - sum error = [ 2093.2856, 2083.1135, 2089.8939, 2083.8023, 2093.2730] +25-08-28 07:10:34 | D | - best error = [ 2093.2856, 2083.1135, 2083.1135, 2083.1135, 2083.1135] +25-08-28 07:10:34 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:10:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:10:34 | D | - sum error = [ 2181.8721, 2210.6770, 2237.7438, 2336.2245, 2409.2511] +25-08-28 07:10:34 | D | - best error = [ 2083.1135, 2083.1135, 2083.1135, 2083.1135, 2083.1135] +25-08-28 07:10:34 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:10:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:10:34 | D | - sum error = [ 2535.1686, 2651.1920, 2792.7794, 3001.0661, 3197.4230] +25-08-28 07:10:34 | D | - best error = [ 2083.1135, 2083.1135, 2083.1135, 2083.1135, 2083.1135] +25-08-28 07:10:34 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:10:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:10:34 | D | - sum error = [ 3427.3482, 3701.9849, 4096.4150, 4481.9824, 4921.0617] +25-08-28 07:10:34 | D | - best error = [ 2083.1135, 2083.1135, 2083.1135, 2083.1135, 2083.1135] +25-08-28 07:10:34 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:10:34 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:10:34 | D | - sum error = [ 2588.4243, 2402.2642, 2270.7685, 2243.8873, 2194.4302] +25-08-28 07:10:34 | D | - best error = [ 2083.1135, 2083.1135, 2083.1135, 2083.1135, 2083.1135] +25-08-28 07:10:34 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:10:34 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:10:34 | D | - sum error = [ 2163.2132, 2149.4870, 2206.3957, 2246.1952, 2355.3809] +25-08-28 07:10:34 | D | - best error = [ 2083.1135, 2083.1135, 2083.1135, 2083.1135, 2083.1135] +25-08-28 07:10:34 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:10:34 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:10:34 | D | - sum error = [ 2497.8596, 2565.4549, 2702.8169, 2808.9049, 3097.1345] +25-08-28 07:10:34 | D | - best error = [ 2083.1135, 2083.1135, 2083.1135, 2083.1135, 2083.1135] +25-08-28 07:10:34 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:10:34 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:10:34 | D | - sum error = [ 3369.8157, 3731.4281, 4210.2375, 4778.9493] +25-08-28 07:10:34 | D | - best error = [ 2083.1135, 2083.1135, 2083.1135, 2083.1135] +25-08-28 07:10:34 | D | + error = 2083.1135 +25-08-28 07:10:34 | D | + scale = [min=0.9359, max=1.1055] +25-08-28 07:10:34 | D | - transformer_blocks.0.ff_context.up_proj +25-08-28 07:10:34 | D | + w: sint4 +25-08-28 07:10:34 | D | + x: sint4 +25-08-28 07:10:34 | D | + y: None +25-08-28 07:10:34 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:10:34 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 07:10:34 | D | + x - AbsMax +25-08-28 07:10:34 | D | + x = [min=0.1562, max=11.8750] +25-08-28 07:10:34 | D | + w - AbsMax +25-08-28 07:10:34 | D | + w = [min=0.1162, max=0.4180] +25-08-28 07:10:34 | D | + finished resetting calibrator, ram usage: 19.1 +25-08-28 07:10:36 | D | + finished calculating the original outputs, ram usage: 19.3 +25-08-28 07:11:20 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:11:20 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:11:20 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:11:20 | D | - sum error = [ 5536.0190, 5360.7143, 5199.6325, 5056.1683, 4924.2092] +25-08-28 07:11:20 | D | - best error = [ 5536.0190, 5360.7143, 5199.6325, 5056.1683, 4924.2092] +25-08-28 07:11:20 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:11:20 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:11:20 | D | - sum error = [ 4782.0788, 4658.1870, 4569.8987, 4482.6680, 4402.4552] +25-08-28 07:11:20 | D | - best error = [ 4782.0788, 4658.1870, 4569.8987, 4482.6680, 4402.4552] +25-08-28 07:11:20 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:11:20 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:11:20 | D | - sum error = [ 4358.3114, 4307.0584, 4284.4194, 4265.9883, 4289.6531] +25-08-28 07:11:20 | D | - best error = [ 4358.3114, 4307.0584, 4284.4194, 4265.9883, 4265.9883] +25-08-28 07:11:20 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:11:20 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:11:20 | D | - sum error = [ 4303.7556, 4350.0497, 4387.1430, 4488.1364, 4567.2823] +25-08-28 07:11:20 | D | - best error = [ 4265.9883, 4265.9883, 4265.9883, 4265.9883, 4265.9883] +25-08-28 07:11:20 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:11:20 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:11:20 | D | - sum error = [ 5642.3600, 5432.9419, 5268.7300, 5096.4709, 4958.4014] +25-08-28 07:11:20 | D | - best error = [ 4265.9883, 4265.9883, 4265.9883, 4265.9883, 4265.9883] +25-08-28 07:11:20 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:11:20 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:11:20 | D | - sum error = [ 4797.5660, 4644.1507, 4544.8133, 4477.2273, 4385.0439] +25-08-28 07:11:20 | D | - best error = [ 4265.9883, 4265.9883, 4265.9883, 4265.9883, 4265.9883] +25-08-28 07:11:20 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:11:20 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:11:20 | D | - sum error = [ 4320.0616, 4300.6958, 4279.2249, 4273.8830, 4294.0489] +25-08-28 07:11:20 | D | - best error = [ 4265.9883, 4265.9883, 4265.9883, 4265.9883, 4265.9883] +25-08-28 07:11:20 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:11:20 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:11:20 | D | - sum error = [ 4320.1391, 4374.5425, 4462.0412, 4575.0460] +25-08-28 07:11:20 | D | - best error = [ 4265.9883, 4265.9883, 4265.9883, 4265.9883] +25-08-28 07:11:20 | D | + error = 4265.9883 +25-08-28 07:11:20 | D | + scale = [min=0.2992, max=4.9947] +25-08-28 07:11:20 | D | - transformer_blocks.0.ff_context.down_proj +25-08-28 07:11:20 | D | + w: sint4 +25-08-28 07:11:20 | D | + x: uint4 +25-08-28 07:11:20 | D | + y: None +25-08-28 07:11:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:11:20 | D | + finished parsing calibration arguments, ram usage: 19.3 +25-08-28 07:11:21 | D | + x - AbsMax +25-08-28 07:11:21 | D | + x = [min=0.1719, max=50.5000] +25-08-28 07:11:21 | D | + w - AbsMax +25-08-28 07:11:21 | D | + w = [min=0.0723, max=0.5039] +25-08-28 07:11:21 | D | + finished resetting calibrator, ram usage: 19.3 +25-08-28 07:11:21 | D | + finished calculating the original outputs, ram usage: 19.3 +25-08-28 07:12:09 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:12:09 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:12:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:12:09 | D | - sum error = [ 3331.5475, 3232.1355, 3168.9620, 3098.2573, 3027.5870] +25-08-28 07:12:09 | D | - best error = [ 3331.5475, 3232.1355, 3168.9620, 3098.2573, 3027.5870] +25-08-28 07:12:09 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:12:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:12:09 | D | - sum error = [ 3022.0558, 3005.2968, 3025.8967, 3055.2677, 3156.2091] +25-08-28 07:12:09 | D | - best error = [ 3022.0558, 3005.2968, 3005.2968, 3005.2968, 3005.2968] +25-08-28 07:12:09 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:12:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:12:09 | D | - sum error = [ 3263.9164, 3458.3231, 3712.3696, 4036.7882, 4388.9192] +25-08-28 07:12:09 | D | - best error = [ 3005.2968, 3005.2968, 3005.2968, 3005.2968, 3005.2968] +25-08-28 07:12:09 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:12:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:12:09 | D | - sum error = [ 4902.2782, 5530.4126, 6424.5376, 7279.1651, 8260.6285] +25-08-28 07:12:09 | D | - best error = [ 3005.2968, 3005.2968, 3005.2968, 3005.2968, 3005.2968] +25-08-28 07:12:09 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:12:09 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:12:09 | D | - sum error = [ 3814.4030, 3675.8618, 3529.2462, 3429.8761, 3368.2862] +25-08-28 07:12:09 | D | - best error = [ 3005.2968, 3005.2968, 3005.2968, 3005.2968, 3005.2968] +25-08-28 07:12:09 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:12:09 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:12:09 | D | - sum error = [ 3299.3689, 3260.5213, 3263.0245, 3307.7622, 3371.8536] +25-08-28 07:12:09 | D | - best error = [ 3005.2968, 3005.2968, 3005.2968, 3005.2968, 3005.2968] +25-08-28 07:12:09 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:12:09 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:12:09 | D | - sum error = [ 3511.6038, 3741.6873, 4010.3339, 4372.6758, 4859.3691] +25-08-28 07:12:09 | D | - best error = [ 3005.2968, 3005.2968, 3005.2968, 3005.2968, 3005.2968] +25-08-28 07:12:09 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:12:09 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:12:09 | D | - sum error = [ 5499.7771, 6446.4694, 7305.9932, 8207.2948] +25-08-28 07:12:09 | D | - best error = [ 3005.2968, 3005.2968, 3005.2968, 3005.2968] +25-08-28 07:12:09 | D | + error = 3005.2968 +25-08-28 07:12:09 | D | + scale = [min=0.5896, max=3.2433] +25-08-28 07:12:28 | D | - Smoothing Diffusion Block transformer_blocks.1 +25-08-28 07:12:28 | D | - Skipping Module transformer_blocks.1.norm1.linear +25-08-28 07:12:28 | D | - Skipping Module transformer_blocks.1.norm1_context.linear +25-08-28 07:12:28 | D | - Smoothing Transformer Block transformer_blocks.1 +25-08-28 07:12:28 | D | - transformer_blocks.1.attn.qkv_proj +25-08-28 07:12:28 | D | + w: sint4 +25-08-28 07:12:28 | D | + x: sint4 +25-08-28 07:12:28 | D | + y: None +25-08-28 07:12:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:12:28 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:12:28 | D | + x - AbsMax +25-08-28 07:12:28 | D | + x = [min=0.0439, max=19.7500] +25-08-28 07:12:28 | D | + w - AbsMax +25-08-28 07:12:28 | D | + w = [min=0.0923, max=1.0000] +25-08-28 07:12:28 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:12:30 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:13:45 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:13:45 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:13:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:13:45 | D | - sum error = [ 2399.4045, 2186.5744, 2000.7996, 1772.6597, 1663.3081] +25-08-28 07:13:45 | D | - best error = [ 2399.4045, 2186.5744, 2000.7996, 1772.6597, 1663.3081] +25-08-28 07:13:45 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:13:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:13:45 | D | - sum error = [ 1545.3834, 1444.0149, 1368.9006, 1323.2725, 1335.7525] +25-08-28 07:13:45 | D | - best error = [ 1545.3834, 1444.0149, 1368.9006, 1323.2725, 1323.2725] +25-08-28 07:13:45 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:13:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:13:45 | D | - sum error = [ 1430.0206, 1534.9777, 1627.0890, 1866.0760, 2108.3677] +25-08-28 07:13:45 | D | - best error = [ 1323.2725, 1323.2725, 1323.2725, 1323.2725, 1323.2725] +25-08-28 07:13:45 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:13:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:13:45 | D | - sum error = [ 2305.0794, 2489.5765, 2835.0194, 3114.7981, 3450.1333] +25-08-28 07:13:45 | D | - best error = [ 1323.2725, 1323.2725, 1323.2725, 1323.2725, 1323.2725] +25-08-28 07:13:45 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:13:45 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:13:45 | D | - sum error = [ 6997.1097, 6122.5883, 5515.2255, 4514.6384, 3602.8981] +25-08-28 07:13:45 | D | - best error = [ 1323.2725, 1323.2725, 1323.2725, 1323.2725, 1323.2725] +25-08-28 07:13:45 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:13:45 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:13:45 | D | - sum error = [ 3316.9601, 3258.0531, 2847.2366, 2735.3508, 2790.8749] +25-08-28 07:13:45 | D | - best error = [ 1323.2725, 1323.2725, 1323.2725, 1323.2725, 1323.2725] +25-08-28 07:13:45 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:13:45 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:13:45 | D | - sum error = [ 2597.4132, 2463.9083, 2434.2038, 2562.9386, 2807.7410] +25-08-28 07:13:45 | D | - best error = [ 1323.2725, 1323.2725, 1323.2725, 1323.2725, 1323.2725] +25-08-28 07:13:45 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:13:45 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:13:45 | D | - sum error = [ 3048.9870, 3207.2309, 3483.5901, 3674.0210] +25-08-28 07:13:45 | D | - best error = [ 1323.2725, 1323.2725, 1323.2725, 1323.2725] +25-08-28 07:13:45 | D | + error = 1323.2725 +25-08-28 07:13:45 | D | + scale = [min=0.2865, max=3.2978] +25-08-28 07:13:45 | D | - transformer_blocks.1.attn add_qkv_proj +25-08-28 07:13:45 | D | + w: sint4 +25-08-28 07:13:45 | D | + x: sint4 +25-08-28 07:13:45 | D | + y: None +25-08-28 07:13:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:13:45 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:13:45 | D | + x - AbsMax +25-08-28 07:13:45 | D | + x = [min=0.1035, max=39.2500] +25-08-28 07:13:45 | D | + w - AbsMax +25-08-28 07:13:45 | D | + w = [min=0.1167, max=0.3828] +25-08-28 07:13:45 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:13:46 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:14:51 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:14:51 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:14:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:14:51 | D | - sum error = [ 1947.1986, 1722.6493, 1587.3023, 1470.3775, 1389.1828] +25-08-28 07:14:51 | D | - best error = [ 1947.1986, 1722.6493, 1587.3023, 1470.3775, 1389.1828] +25-08-28 07:14:51 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:14:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:14:51 | D | - sum error = [ 1302.6896, 1253.9466, 1178.4509, 1116.7519, 1113.4374] +25-08-28 07:14:51 | D | - best error = [ 1302.6896, 1253.9466, 1178.4509, 1116.7519, 1113.4374] +25-08-28 07:14:51 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:14:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:14:51 | D | - sum error = [ 1118.3685, 1164.3079, 1110.4045, 1161.2089, 1175.5136] +25-08-28 07:14:51 | D | - best error = [ 1113.4374, 1113.4374, 1110.4045, 1110.4045, 1110.4045] +25-08-28 07:14:51 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:14:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:14:51 | D | - sum error = [ 1198.1122, 1311.3872, 1350.5215, 1472.4669, 1622.5437] +25-08-28 07:14:51 | D | - best error = [ 1110.4045, 1110.4045, 1110.4045, 1110.4045, 1110.4045] +25-08-28 07:14:51 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:14:51 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:14:51 | D | - sum error = [ 2927.9102, 2707.5616, 2491.7504, 2311.6444, 2046.4489] +25-08-28 07:14:51 | D | - best error = [ 1110.4045, 1110.4045, 1110.4045, 1110.4045, 1110.4045] +25-08-28 07:14:51 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:14:51 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:14:51 | D | - sum error = [ 1837.7065, 1661.3549, 1540.5283, 1434.6683, 1388.5291] +25-08-28 07:14:51 | D | - best error = [ 1110.4045, 1110.4045, 1110.4045, 1110.4045, 1110.4045] +25-08-28 07:14:51 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:14:51 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:14:51 | D | - sum error = [ 1288.6711, 1286.9972, 1283.4531, 1290.8909, 1266.3116] +25-08-28 07:14:51 | D | - best error = [ 1110.4045, 1110.4045, 1110.4045, 1110.4045, 1110.4045] +25-08-28 07:14:51 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:14:51 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:14:51 | D | - sum error = [ 1336.8531, 1361.3068, 1491.1045, 1636.6531] +25-08-28 07:14:51 | D | - best error = [ 1110.4045, 1110.4045, 1110.4045, 1110.4045] +25-08-28 07:14:51 | D | + error = 1110.4045 +25-08-28 07:14:51 | D | + scale = [min=0.2565, max=9.0428] +25-08-28 07:14:51 | D | - transformer_blocks.1.attn.out_proj + transformer_blocks.1.attn.add_out_proj +25-08-28 07:14:51 | D | + w: sint4 +25-08-28 07:14:51 | D | + x: sint4 +25-08-28 07:14:51 | D | + y: None +25-08-28 07:14:51 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:14:51 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:14:52 | D | + x - AbsMax +25-08-28 07:14:52 | D | + x = [min=0.1787, max=7.1875] +25-08-28 07:14:52 | D | + w - AbsMax +25-08-28 07:14:52 | D | + w = [min=0.1064, max=0.4746] +25-08-28 07:14:52 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:14:53 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:16:44 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:16:44 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:16:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:16:44 | D | - sum error = [ 3402.3370, 3283.2029, 3167.1819, 3063.6697, 2977.6717] +25-08-28 07:16:44 | D | - best error = [ 3402.3370, 3283.2029, 3167.1819, 3063.6697, 2977.6717] +25-08-28 07:16:44 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:16:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:16:44 | D | - sum error = [ 2892.7322, 2861.7608, 2811.7338, 2810.0911, 2794.0065] +25-08-28 07:16:44 | D | - best error = [ 2892.7322, 2861.7608, 2811.7338, 2810.0911, 2794.0065] +25-08-28 07:16:44 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:16:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:16:44 | D | - sum error = [ 2792.6915, 2818.9872, 2863.9618, 2913.1703, 2978.8323] +25-08-28 07:16:44 | D | - best error = [ 2792.6915, 2792.6915, 2792.6915, 2792.6915, 2792.6915] +25-08-28 07:16:44 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:16:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:16:44 | D | - sum error = [ 3074.0747, 3186.8522, 3299.2335, 3411.6115, 3552.1934] +25-08-28 07:16:44 | D | - best error = [ 2792.6915, 2792.6915, 2792.6915, 2792.6915, 2792.6915] +25-08-28 07:16:44 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:16:44 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:16:44 | D | - sum error = [ 3975.8624, 3768.5565, 3577.9131, 3392.8700, 3256.6392] +25-08-28 07:16:44 | D | - best error = [ 2792.6915, 2792.6915, 2792.6915, 2792.6915, 2792.6915] +25-08-28 07:16:44 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:16:44 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:16:44 | D | - sum error = [ 3158.2009, 3056.1319, 2981.8714, 2932.3143, 2901.2327] +25-08-28 07:16:44 | D | - best error = [ 2792.6915, 2792.6915, 2792.6915, 2792.6915, 2792.6915] +25-08-28 07:16:44 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:16:44 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:16:44 | D | - sum error = [ 2895.0910, 2924.2002, 2929.5879, 2983.4827, 3066.3651] +25-08-28 07:16:44 | D | - best error = [ 2792.6915, 2792.6915, 2792.6915, 2792.6915, 2792.6915] +25-08-28 07:16:44 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:16:44 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:16:44 | D | - sum error = [ 3162.4796, 3272.3463, 3375.9270, 3549.0348] +25-08-28 07:16:44 | D | - best error = [ 2792.6915, 2792.6915, 2792.6915, 2792.6915] +25-08-28 07:16:44 | D | + error = 2792.6915 +25-08-28 07:16:44 | D | + scale = [min=0.4227, max=2.6810] +25-08-28 07:16:45 | D | - transformer_blocks.1.ff.up_proj +25-08-28 07:16:45 | D | + w: sint4 +25-08-28 07:16:45 | D | + x: sint4 +25-08-28 07:16:45 | D | + y: None +25-08-28 07:16:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:16:45 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:16:45 | D | + x - AbsMax +25-08-28 07:16:45 | D | + x = [min=0.0184, max=12.1250] +25-08-28 07:16:45 | D | + w - AbsMax +25-08-28 07:16:45 | D | + w = [min=0.0309, max=0.4727] +25-08-28 07:16:45 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:16:47 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:18:03 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:18:03 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:18:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:18:03 | D | - sum error = [ 1850.8953, 1767.9138, 1698.5849, 1635.1101, 1581.1536] +25-08-28 07:18:03 | D | - best error = [ 1850.8953, 1767.9138, 1698.5849, 1635.1101, 1581.1536] +25-08-28 07:18:03 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:18:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:18:03 | D | - sum error = [ 1528.9167, 1486.6664, 1450.4822, 1422.6221, 1402.3261] +25-08-28 07:18:03 | D | - best error = [ 1528.9167, 1486.6664, 1450.4822, 1422.6221, 1402.3261] +25-08-28 07:18:03 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:18:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:18:03 | D | - sum error = [ 1387.1096, 1379.0345, 1377.8466, 1382.8698, 1391.4576] +25-08-28 07:18:03 | D | - best error = [ 1387.1096, 1379.0345, 1377.8466, 1377.8466, 1377.8466] +25-08-28 07:18:03 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:18:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:18:03 | D | - sum error = [ 1404.4667, 1424.9468, 1460.6326, 1514.5304, 1597.6825] +25-08-28 07:18:03 | D | - best error = [ 1377.8466, 1377.8466, 1377.8466, 1377.8466, 1377.8466] +25-08-28 07:18:03 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:18:03 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:18:03 | D | - sum error = [ 2737.5543, 2440.0805, 2236.2255, 2085.4859, 1959.3410] +25-08-28 07:18:03 | D | - best error = [ 1377.8466, 1377.8466, 1377.8466, 1377.8466, 1377.8466] +25-08-28 07:18:03 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:18:03 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:18:03 | D | - sum error = [ 1852.0516, 1750.3650, 1663.8161, 1592.2231, 1532.5021] +25-08-28 07:18:03 | D | - best error = [ 1377.8466, 1377.8466, 1377.8466, 1377.8466, 1377.8466] +25-08-28 07:18:03 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:18:03 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:18:03 | D | - sum error = [ 1485.0456, 1452.5220, 1429.6341, 1420.3060, 1422.2789] +25-08-28 07:18:03 | D | - best error = [ 1377.8466, 1377.8466, 1377.8466, 1377.8466, 1377.8466] +25-08-28 07:18:03 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:18:03 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:18:03 | D | - sum error = [ 1433.8429, 1455.4521, 1499.5455, 1579.3205] +25-08-28 07:18:03 | D | - best error = [ 1377.8466, 1377.8466, 1377.8466, 1377.8466] +25-08-28 07:18:03 | D | + error = 1377.8466 +25-08-28 07:18:03 | D | + scale = [min=0.0911, max=4.4690] +25-08-28 07:18:04 | D | - transformer_blocks.1.ff.down_proj +25-08-28 07:18:04 | D | + w: sint4 +25-08-28 07:18:04 | D | + x: uint4 +25-08-28 07:18:04 | D | + y: None +25-08-28 07:18:04 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:18:04 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:18:06 | D | + x - AbsMax +25-08-28 07:18:06 | D | + x = [min=0.2383, max=10.0625] +25-08-28 07:18:06 | D | + w - AbsMax +25-08-28 07:18:06 | D | + w = [min=0.0776, max=0.5977] +25-08-28 07:18:06 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:18:10 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:20:05 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:20:05 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:20:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:20:05 | D | - sum error = [ 3071.9342, 3045.1092, 3052.6244, 3051.4210, 3060.6712] +25-08-28 07:20:05 | D | - best error = [ 3071.9342, 3045.1092, 3045.1092, 3045.1092, 3045.1092] +25-08-28 07:20:05 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:20:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:20:05 | D | - sum error = [ 3033.6732, 3032.6324, 3012.5755, 2990.1095, 2991.3733] +25-08-28 07:20:05 | D | - best error = [ 3033.6732, 3032.6324, 3012.5755, 2990.1095, 2990.1095] +25-08-28 07:20:05 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:20:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:20:05 | D | - sum error = [ 3006.5331, 3038.4591, 3079.8362, 3103.7648, 3114.1726] +25-08-28 07:20:05 | D | - best error = [ 2990.1095, 2990.1095, 2990.1095, 2990.1095, 2990.1095] +25-08-28 07:20:05 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:20:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:20:05 | D | - sum error = [ 3154.6345, 3205.1159, 3229.4931, 3293.1465, 3319.0179] +25-08-28 07:20:05 | D | - best error = [ 2990.1095, 2990.1095, 2990.1095, 2990.1095, 2990.1095] +25-08-28 07:20:05 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:20:05 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:20:05 | D | - sum error = [ 3368.8053, 3294.4353, 3231.5504, 3183.2678, 3141.8347] +25-08-28 07:20:05 | D | - best error = [ 2990.1095, 2990.1095, 2990.1095, 2990.1095, 2990.1095] +25-08-28 07:20:05 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:20:05 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:20:05 | D | - sum error = [ 3113.6405, 3097.4845, 3098.7325, 3047.5055, 3016.9897] +25-08-28 07:20:05 | D | - best error = [ 2990.1095, 2990.1095, 2990.1095, 2990.1095, 2990.1095] +25-08-28 07:20:05 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:20:05 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:20:05 | D | - sum error = [ 3013.6640, 3026.0782, 3017.0658, 3057.5443, 3101.3539] +25-08-28 07:20:05 | D | - best error = [ 2990.1095, 2990.1095, 2990.1095, 2990.1095, 2990.1095] +25-08-28 07:20:05 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:20:05 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:20:05 | D | - sum error = [ 3108.3837, 3173.0244, 3215.7201, 3304.0870] +25-08-28 07:20:05 | D | - best error = [ 2990.1095, 2990.1095, 2990.1095, 2990.1095] +25-08-28 07:20:05 | D | + error = 2990.1095 +25-08-28 07:20:05 | D | + scale = [min=0.5634, max=2.5182] +25-08-28 07:20:05 | D | - transformer_blocks.1.ff_context.up_proj +25-08-28 07:20:05 | D | + w: sint4 +25-08-28 07:20:05 | D | + x: sint4 +25-08-28 07:20:05 | D | + y: None +25-08-28 07:20:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:20:05 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:20:05 | D | + x - AbsMax +25-08-28 07:20:05 | D | + x = [min=0.1338, max=68.5000] +25-08-28 07:20:05 | D | + w - AbsMax +25-08-28 07:20:05 | D | + w = [min=0.1069, max=0.4043] +25-08-28 07:20:05 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:20:05 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:20:50 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:20:50 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:20:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:20:50 | D | - sum error = [ 6879.3322, 6480.6345, 6097.1160, 5753.7158, 5421.1134] +25-08-28 07:20:50 | D | - best error = [ 6879.3322, 6480.6345, 6097.1160, 5753.7158, 5421.1134] +25-08-28 07:20:50 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:20:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:20:50 | D | - sum error = [ 5129.5592, 4855.5945, 4606.0734, 4404.5672, 4219.1246] +25-08-28 07:20:50 | D | - best error = [ 5129.5592, 4855.5945, 4606.0734, 4404.5672, 4219.1246] +25-08-28 07:20:50 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:20:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:20:50 | D | - sum error = [ 4106.8923, 3969.5026, 3897.0232, 3836.9639, 3803.2576] +25-08-28 07:20:50 | D | - best error = [ 4106.8923, 3969.5026, 3897.0232, 3836.9639, 3803.2576] +25-08-28 07:20:50 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:20:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:20:50 | D | - sum error = [ 3809.5330, 3846.0634, 3923.9764, 3981.0509, 4133.9105] +25-08-28 07:20:50 | D | - best error = [ 3803.2576, 3803.2576, 3803.2576, 3803.2576, 3803.2576] +25-08-28 07:20:50 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:20:50 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:20:50 | D | - sum error = [ 6799.6082, 6389.7571, 5993.3787, 5648.9482, 5329.2720] +25-08-28 07:20:50 | D | - best error = [ 3803.2576, 3803.2576, 3803.2576, 3803.2576, 3803.2576] +25-08-28 07:20:50 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:20:50 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:20:50 | D | - sum error = [ 5029.3980, 4766.6697, 4511.6163, 4326.1754, 4166.3677] +25-08-28 07:20:50 | D | - best error = [ 3803.2576, 3803.2576, 3803.2576, 3803.2576, 3803.2576] +25-08-28 07:20:50 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:20:50 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:20:50 | D | - sum error = [ 4028.1861, 3929.6043, 3864.1233, 3819.4412, 3833.0493] +25-08-28 07:20:50 | D | - best error = [ 3803.2576, 3803.2576, 3803.2576, 3803.2576, 3803.2576] +25-08-28 07:20:50 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:20:50 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:20:50 | D | - sum error = [ 3834.0609, 3897.3741, 3973.8298, 4127.8437] +25-08-28 07:20:50 | D | - best error = [ 3803.2576, 3803.2576, 3803.2576, 3803.2576] +25-08-28 07:20:50 | D | + error = 3803.2576 +25-08-28 07:20:50 | D | + scale = [min=0.2446, max=19.2745] +25-08-28 07:20:50 | D | - transformer_blocks.1.ff_context.down_proj +25-08-28 07:20:50 | D | + w: sint4 +25-08-28 07:20:50 | D | + x: uint4 +25-08-28 07:20:50 | D | + y: None +25-08-28 07:20:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:20:50 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:20:50 | D | + x - AbsMax +25-08-28 07:20:50 | D | + x = [min=0.1719, max=86.5000] +25-08-28 07:20:50 | D | + w - AbsMax +25-08-28 07:20:50 | D | + w = [min=0.0295, max=0.7656] +25-08-28 07:20:50 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:20:51 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:21:39 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:21:39 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:21:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:21:39 | D | - sum error = [ 4833.3959, 4572.3880, 4347.3962, 4171.6512, 4088.5765] +25-08-28 07:21:39 | D | - best error = [ 4833.3959, 4572.3880, 4347.3962, 4171.6512, 4088.5765] +25-08-28 07:21:39 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:21:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:21:39 | D | - sum error = [ 4091.3653, 5031.1826, 7133.8537, 10520.6988, 14804.5906] +25-08-28 07:21:39 | D | - best error = [ 4088.5765, 4088.5765, 4088.5765, 4088.5765, 4088.5765] +25-08-28 07:21:39 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:21:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:21:39 | D | - sum error = [19281.9974, 23343.8520, 26359.1672, 28475.2715, 29762.4246] +25-08-28 07:21:39 | D | - best error = [ 4088.5765, 4088.5765, 4088.5765, 4088.5765, 4088.5765] +25-08-28 07:21:39 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:21:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:21:39 | D | - sum error = [30578.9774, 31167.0565, 31362.6499, 31519.0903, 31769.3417] +25-08-28 07:21:39 | D | - best error = [ 4088.5765, 4088.5765, 4088.5765, 4088.5765, 4088.5765] +25-08-28 07:21:39 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:21:39 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:21:39 | D | - sum error = [ 6551.3040, 5716.2082, 5140.3438, 4701.4223, 4382.3311] +25-08-28 07:21:39 | D | - best error = [ 4088.5765, 4088.5765, 4088.5765, 4088.5765, 4088.5765] +25-08-28 07:21:39 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:21:39 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:21:39 | D | - sum error = [ 4221.0179, 4016.5635, 4085.3600, 4769.0529, 7162.6420] +25-08-28 07:21:39 | D | - best error = [ 4088.5765, 4016.5635, 4016.5635, 4016.5635, 4016.5635] +25-08-28 07:21:39 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:21:39 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:21:39 | D | - sum error = [12113.2285, 18145.3086, 23750.7524, 27611.3055, 29743.6321] +25-08-28 07:21:39 | D | - best error = [ 4016.5635, 4016.5635, 4016.5635, 4016.5635, 4016.5635] +25-08-28 07:21:39 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:21:39 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:21:39 | D | - sum error = [30786.8876, 31193.3080, 31427.5304, 31737.9682] +25-08-28 07:21:39 | D | - best error = [ 4016.5635, 4016.5635, 4016.5635, 4016.5635] +25-08-28 07:21:39 | D | + error = 4016.5635 +25-08-28 07:21:39 | D | + scale = [min=0.6423, max=12.4354] +25-08-28 07:21:59 | D | - Smoothing Diffusion Block transformer_blocks.2 +25-08-28 07:21:59 | D | - Skipping Module transformer_blocks.2.norm1.linear +25-08-28 07:21:59 | D | - Skipping Module transformer_blocks.2.norm1_context.linear +25-08-28 07:21:59 | D | - Smoothing Transformer Block transformer_blocks.2 +25-08-28 07:21:59 | D | - transformer_blocks.2.attn.qkv_proj +25-08-28 07:21:59 | D | + w: sint4 +25-08-28 07:21:59 | D | + x: sint4 +25-08-28 07:21:59 | D | + y: None +25-08-28 07:21:59 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:21:59 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:22:00 | D | + x - AbsMax +25-08-28 07:22:00 | D | + x = [min=0.0201, max=11.5000] +25-08-28 07:22:00 | D | + w - AbsMax +25-08-28 07:22:00 | D | + w = [min=0.0923, max=0.9727] +25-08-28 07:22:00 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:22:01 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:23:13 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:23:13 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:23:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:23:13 | D | - sum error = [ 3255.7073, 3232.4653, 2976.5996, 2860.6190, 2746.9445] +25-08-28 07:23:13 | D | - best error = [ 3255.7073, 3232.4653, 2976.5996, 2860.6190, 2746.9445] +25-08-28 07:23:13 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:23:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:23:13 | D | - sum error = [ 2587.1218, 2559.9523, 2612.2622, 2677.0910, 2769.5149] +25-08-28 07:23:13 | D | - best error = [ 2587.1218, 2559.9523, 2559.9523, 2559.9523, 2559.9523] +25-08-28 07:23:13 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:23:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:23:13 | D | - sum error = [ 2872.5034, 3065.7880, 3335.9282, 3381.1937, 3646.5654] +25-08-28 07:23:13 | D | - best error = [ 2559.9523, 2559.9523, 2559.9523, 2559.9523, 2559.9523] +25-08-28 07:23:13 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:23:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:23:13 | D | - sum error = [ 3815.7828, 4075.5682, 4397.7187, 4564.0160, 4762.7500] +25-08-28 07:23:13 | D | - best error = [ 2559.9523, 2559.9523, 2559.9523, 2559.9523, 2559.9523] +25-08-28 07:23:13 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:23:13 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:23:13 | D | - sum error = [10880.0831, 8414.5997, 7373.1128, 6697.7357, 6256.7870] +25-08-28 07:23:13 | D | - best error = [ 2559.9523, 2559.9523, 2559.9523, 2559.9523, 2559.9523] +25-08-28 07:23:13 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:23:13 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:23:13 | D | - sum error = [ 5821.1607, 5491.2691, 5255.7820, 4884.1699, 4500.9539] +25-08-28 07:23:13 | D | - best error = [ 2559.9523, 2559.9523, 2559.9523, 2559.9523, 2559.9523] +25-08-28 07:23:13 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:23:13 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:23:13 | D | - sum error = [ 4462.7897, 4299.0566, 4362.9094, 4471.8777, 4641.7734] +25-08-28 07:23:13 | D | - best error = [ 2559.9523, 2559.9523, 2559.9523, 2559.9523, 2559.9523] +25-08-28 07:23:13 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:23:13 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:23:13 | D | - sum error = [ 4807.7845, 4850.1635, 4813.3485, 4828.6959] +25-08-28 07:23:13 | D | - best error = [ 2559.9523, 2559.9523, 2559.9523, 2559.9523] +25-08-28 07:23:13 | D | + error = 2559.9523 +25-08-28 07:23:13 | D | + scale = [min=0.3099, max=2.0807] +25-08-28 07:23:13 | D | - transformer_blocks.2.attn add_qkv_proj +25-08-28 07:23:13 | D | + w: sint4 +25-08-28 07:23:13 | D | + x: sint4 +25-08-28 07:23:13 | D | + y: None +25-08-28 07:23:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:23:13 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:23:13 | D | + x - AbsMax +25-08-28 07:23:13 | D | + x = [min=0.0469, max=34.0000] +25-08-28 07:23:13 | D | + w - AbsMax +25-08-28 07:23:13 | D | + w = [min=0.1064, max=0.6797] +25-08-28 07:23:13 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:23:14 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:24:17 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:24:17 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:24:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:24:17 | D | - sum error = [ 1393.7083, 1219.9658, 1177.5527, 1097.6303, 1014.2541] +25-08-28 07:24:17 | D | - best error = [ 1393.7083, 1219.9658, 1177.5527, 1097.6303, 1014.2541] +25-08-28 07:24:17 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:24:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:24:17 | D | - sum error = [ 867.3109, 834.4361, 855.6369, 728.2153, 700.1316] +25-08-28 07:24:17 | D | - best error = [ 867.3109, 834.4361, 834.4361, 728.2153, 700.1316] +25-08-28 07:24:17 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:24:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:24:17 | D | - sum error = [ 686.0557, 660.6542, 678.3373, 694.5146, 737.6446] +25-08-28 07:24:17 | D | - best error = [ 686.0557, 660.6542, 660.6542, 660.6542, 660.6542] +25-08-28 07:24:17 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:24:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:24:17 | D | - sum error = [ 853.8489, 939.9250, 1187.2548, 1416.8780, 1800.6278] +25-08-28 07:24:17 | D | - best error = [ 660.6542, 660.6542, 660.6542, 660.6542, 660.6542] +25-08-28 07:24:17 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:24:17 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:24:17 | D | - sum error = [ 2346.3166, 2058.3747, 2250.4023, 2166.3894, 2003.1385] +25-08-28 07:24:17 | D | - best error = [ 660.6542, 660.6542, 660.6542, 660.6542, 660.6542] +25-08-28 07:24:17 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:24:17 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:24:17 | D | - sum error = [ 1614.7085, 1490.7895, 1339.2030, 1169.8060, 1019.8832] +25-08-28 07:24:17 | D | - best error = [ 660.6542, 660.6542, 660.6542, 660.6542, 660.6542] +25-08-28 07:24:17 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:24:17 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:24:17 | D | - sum error = [ 918.6871, 882.8868, 889.3186, 874.5342, 886.9641] +25-08-28 07:24:17 | D | - best error = [ 660.6542, 660.6542, 660.6542, 660.6542, 660.6542] +25-08-28 07:24:17 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:24:17 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:24:17 | D | - sum error = [ 932.1047, 1247.0374, 1515.2303, 1779.5628] +25-08-28 07:24:17 | D | - best error = [ 660.6542, 660.6542, 660.6542, 660.6542] +25-08-28 07:24:17 | D | + error = 660.6542 +25-08-28 07:24:17 | D | + scale = [min=0.1858, max=6.9553] +25-08-28 07:24:17 | D | - transformer_blocks.2.attn.out_proj + transformer_blocks.2.attn.add_out_proj +25-08-28 07:24:17 | D | + w: sint4 +25-08-28 07:24:17 | D | + x: sint4 +25-08-28 07:24:17 | D | + y: None +25-08-28 07:24:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:24:17 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:24:18 | D | + x - AbsMax +25-08-28 07:24:18 | D | + x = [min=0.2217, max=11.1250] +25-08-28 07:24:18 | D | + w - AbsMax +25-08-28 07:24:18 | D | + w = [min=0.1064, max=0.5977] +25-08-28 07:24:18 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:24:19 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:26:08 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:26:08 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:26:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:26:08 | D | - sum error = [ 4021.5952, 3857.4603, 3724.7318, 3597.0997, 3508.5215] +25-08-28 07:26:08 | D | - best error = [ 4021.5952, 3857.4603, 3724.7318, 3597.0997, 3508.5215] +25-08-28 07:26:08 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:26:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:26:08 | D | - sum error = [ 3418.9816, 3337.5206, 3288.5766, 3244.0580, 3207.4287] +25-08-28 07:26:08 | D | - best error = [ 3418.9816, 3337.5206, 3288.5766, 3244.0580, 3207.4287] +25-08-28 07:26:08 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:26:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:26:08 | D | - sum error = [ 3196.2317, 3179.6706, 3195.4325, 3209.0472, 3246.9724] +25-08-28 07:26:08 | D | - best error = [ 3196.2317, 3179.6706, 3179.6706, 3179.6706, 3179.6706] +25-08-28 07:26:08 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:26:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:26:08 | D | - sum error = [ 3299.3718, 3353.0562, 3400.6467, 3490.0456, 3604.8515] +25-08-28 07:26:08 | D | - best error = [ 3179.6706, 3179.6706, 3179.6706, 3179.6706, 3179.6706] +25-08-28 07:26:08 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:26:08 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:26:08 | D | - sum error = [ 4625.2654, 4374.6312, 4155.4162, 3965.7741, 3789.9515] +25-08-28 07:26:08 | D | - best error = [ 3179.6706, 3179.6706, 3179.6706, 3179.6706, 3179.6706] +25-08-28 07:26:08 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:26:08 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:26:08 | D | - sum error = [ 3653.7880, 3532.7110, 3443.2845, 3356.4067, 3282.2654] +25-08-28 07:26:08 | D | - best error = [ 3179.6706, 3179.6706, 3179.6706, 3179.6706, 3179.6706] +25-08-28 07:26:08 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:26:08 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:26:08 | D | - sum error = [ 3254.9912, 3234.7674, 3219.5222, 3242.0145, 3265.9233] +25-08-28 07:26:08 | D | - best error = [ 3179.6706, 3179.6706, 3179.6706, 3179.6706, 3179.6706] +25-08-28 07:26:08 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:26:08 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:26:08 | D | - sum error = [ 3312.9535, 3382.3989, 3462.5521, 3586.8016] +25-08-28 07:26:08 | D | - best error = [ 3179.6706, 3179.6706, 3179.6706, 3179.6706] +25-08-28 07:26:08 | D | + error = 3179.6706 +25-08-28 07:26:08 | D | + scale = [min=0.4367, max=3.7624] +25-08-28 07:26:08 | D | - transformer_blocks.2.ff.up_proj +25-08-28 07:26:08 | D | + w: sint4 +25-08-28 07:26:08 | D | + x: sint4 +25-08-28 07:26:08 | D | + y: None +25-08-28 07:26:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:26:08 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:26:09 | D | + x - AbsMax +25-08-28 07:26:09 | D | + x = [min=0.0099, max=6.1562] +25-08-28 07:26:09 | D | + w - AbsMax +25-08-28 07:26:09 | D | + w = [min=0.0325, max=0.7617] +25-08-28 07:26:09 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:26:10 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:27:25 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:27:25 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:27:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:27:25 | D | - sum error = [ 1592.0360, 1543.7825, 1502.4532, 1466.7488, 1436.7557] +25-08-28 07:27:25 | D | - best error = [ 1592.0360, 1543.7825, 1502.4532, 1466.7488, 1436.7557] +25-08-28 07:27:25 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:27:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:27:25 | D | - sum error = [ 1412.4846, 1393.2723, 1378.6322, 1373.2930, 1365.9492] +25-08-28 07:27:25 | D | - best error = [ 1412.4846, 1393.2723, 1378.6322, 1373.2930, 1365.9492] +25-08-28 07:27:25 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:27:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:27:25 | D | - sum error = [ 1369.0309, 1373.7417, 1384.5652, 1396.0777, 1415.1651] +25-08-28 07:27:25 | D | - best error = [ 1365.9492, 1365.9492, 1365.9492, 1365.9492, 1365.9492] +25-08-28 07:27:25 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:27:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:27:25 | D | - sum error = [ 1448.4798, 1494.3463, 1573.4793, 1693.1091, 1865.4938] +25-08-28 07:27:25 | D | - best error = [ 1365.9492, 1365.9492, 1365.9492, 1365.9492, 1365.9492] +25-08-28 07:27:25 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:27:25 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:27:25 | D | - sum error = [ 2389.2187, 2184.8284, 2026.1901, 1911.5151, 1813.0125] +25-08-28 07:27:25 | D | - best error = [ 1365.9492, 1365.9492, 1365.9492, 1365.9492, 1365.9492] +25-08-28 07:27:25 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:27:25 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:27:25 | D | - sum error = [ 1731.3071, 1656.9283, 1593.4514, 1541.8109, 1499.7424] +25-08-28 07:27:25 | D | - best error = [ 1365.9492, 1365.9492, 1365.9492, 1365.9492, 1365.9492] +25-08-28 07:27:25 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:27:25 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:27:25 | D | - sum error = [ 1471.1334, 1450.6601, 1442.3555, 1441.0661, 1445.7936] +25-08-28 07:27:25 | D | - best error = [ 1365.9492, 1365.9492, 1365.9492, 1365.9492, 1365.9492] +25-08-28 07:27:25 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:27:25 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:27:25 | D | - sum error = [ 1470.4534, 1515.7860, 1618.9571, 1803.9979] +25-08-28 07:27:25 | D | - best error = [ 1365.9492, 1365.9492, 1365.9492, 1365.9492] +25-08-28 07:27:25 | D | + error = 1365.9492 +25-08-28 07:27:25 | D | + scale = [min=0.1253, max=2.2656] +25-08-28 07:27:25 | D | - transformer_blocks.2.ff.down_proj +25-08-28 07:27:25 | D | + w: sint4 +25-08-28 07:27:25 | D | + x: uint4 +25-08-28 07:27:25 | D | + y: None +25-08-28 07:27:25 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:27:25 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:27:27 | D | + x - AbsMax +25-08-28 07:27:27 | D | + x = [min=0.3203, max=8.0625] +25-08-28 07:27:27 | D | + w - AbsMax +25-08-28 07:27:27 | D | + w = [min=0.0752, max=0.5977] +25-08-28 07:27:27 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 07:27:29 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 07:29:17 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:29:17 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:29:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:29:17 | D | - sum error = [ 2873.7090, 2858.8460, 2872.5054, 2837.3250, 2828.7832] +25-08-28 07:29:17 | D | - best error = [ 2873.7090, 2858.8460, 2858.8460, 2837.3250, 2828.7832] +25-08-28 07:29:17 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:29:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:29:17 | D | - sum error = [ 2867.2304, 2868.2914, 2872.4831, 2905.5222, 2924.3146] +25-08-28 07:29:17 | D | - best error = [ 2828.7832, 2828.7832, 2828.7832, 2828.7832, 2828.7832] +25-08-28 07:29:17 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:29:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:29:17 | D | - sum error = [ 3010.8295, 3021.5340, 3171.8547, 3210.8443, 3269.3813] +25-08-28 07:29:17 | D | - best error = [ 2828.7832, 2828.7832, 2828.7832, 2828.7832, 2828.7832] +25-08-28 07:29:17 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:29:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:29:17 | D | - sum error = [ 3476.0380, 3571.0620, 3665.3120, 3965.1337, 4116.1105] +25-08-28 07:29:17 | D | - best error = [ 2828.7832, 2828.7832, 2828.7832, 2828.7832, 2828.7832] +25-08-28 07:29:17 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:29:17 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:29:17 | D | - sum error = [ 3668.0609, 3499.4237, 3268.0756, 3154.3939, 3037.3651] +25-08-28 07:29:17 | D | - best error = [ 2828.7832, 2828.7832, 2828.7832, 2828.7832, 2828.7832] +25-08-28 07:29:17 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:29:17 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:29:17 | D | - sum error = [ 2967.1242, 2895.8526, 2877.1536, 2899.8676, 2906.6407] +25-08-28 07:29:17 | D | - best error = [ 2828.7832, 2828.7832, 2828.7832, 2828.7832, 2828.7832] +25-08-28 07:29:17 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:29:17 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:29:17 | D | - sum error = [ 2943.3523, 2980.4053, 3076.4951, 3100.1013, 3171.8176] +25-08-28 07:29:17 | D | - best error = [ 2828.7832, 2828.7832, 2828.7832, 2828.7832, 2828.7832] +25-08-28 07:29:17 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:29:17 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:29:17 | D | - sum error = [ 3346.4767, 3535.2969, 3729.3926, 4115.6535] +25-08-28 07:29:17 | D | - best error = [ 2828.7832, 2828.7832, 2828.7832, 2828.7832] +25-08-28 07:29:17 | D | + error = 2828.7832 +25-08-28 07:29:17 | D | + scale = [min=0.7964, max=1.5181] +25-08-28 07:29:17 | D | - transformer_blocks.2.ff_context.up_proj +25-08-28 07:29:17 | D | + w: sint4 +25-08-28 07:29:17 | D | + x: sint4 +25-08-28 07:29:17 | D | + y: None +25-08-28 07:29:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:29:17 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 07:29:18 | D | + x - AbsMax +25-08-28 07:29:18 | D | + x = [min=0.1064, max=69.0000] +25-08-28 07:29:18 | D | + w - AbsMax +25-08-28 07:29:18 | D | + w = [min=0.1021, max=0.4824] +25-08-28 07:29:18 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 07:29:18 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 07:30:02 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:30:02 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:30:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:30:02 | D | - sum error = [ 4382.3726, 4143.1273, 3922.9093, 3728.3111, 3569.7455] +25-08-28 07:30:02 | D | - best error = [ 4382.3726, 4143.1273, 3922.9093, 3728.3111, 3569.7455] +25-08-28 07:30:02 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:30:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:30:02 | D | - sum error = [ 3442.9592, 3303.6176, 3184.3539, 3127.2360, 3040.4672] +25-08-28 07:30:02 | D | - best error = [ 3442.9592, 3303.6176, 3184.3539, 3127.2360, 3040.4672] +25-08-28 07:30:02 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:30:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:30:02 | D | - sum error = [ 2993.1731, 2986.9141, 2935.2238, 2925.4685, 2971.3457] +25-08-28 07:30:02 | D | - best error = [ 2993.1731, 2986.9141, 2935.2238, 2925.4685, 2925.4685] +25-08-28 07:30:02 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:30:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:30:02 | D | - sum error = [ 3022.1481, 3065.7265, 3159.2483, 3249.5166, 3377.0030] +25-08-28 07:30:02 | D | - best error = [ 2925.4685, 2925.4685, 2925.4685, 2925.4685, 2925.4685] +25-08-28 07:30:02 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:30:02 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:30:02 | D | - sum error = [ 4612.8601, 4344.4758, 4090.3961, 3875.1065, 3663.1182] +25-08-28 07:30:02 | D | - best error = [ 2925.4685, 2925.4685, 2925.4685, 2925.4685, 2925.4685] +25-08-28 07:30:02 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:30:02 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:30:02 | D | - sum error = [ 3508.1991, 3375.6131, 3234.3865, 3155.7329, 3082.3032] +25-08-28 07:30:02 | D | - best error = [ 2925.4685, 2925.4685, 2925.4685, 2925.4685, 2925.4685] +25-08-28 07:30:02 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:30:02 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:30:02 | D | - sum error = [ 3043.1549, 3011.9530, 2994.1780, 3002.5681, 3020.1695] +25-08-28 07:30:02 | D | - best error = [ 2925.4685, 2925.4685, 2925.4685, 2925.4685, 2925.4685] +25-08-28 07:30:02 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:30:02 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:30:02 | D | - sum error = [ 3066.5089, 3147.9536, 3250.3144, 3342.3049] +25-08-28 07:30:02 | D | - best error = [ 2925.4685, 2925.4685, 2925.4685, 2925.4685] +25-08-28 07:30:02 | D | + error = 2925.4685 +25-08-28 07:30:02 | D | + scale = [min=0.2331, max=15.6766] +25-08-28 07:30:03 | D | - transformer_blocks.2.ff_context.down_proj +25-08-28 07:30:03 | D | + w: sint4 +25-08-28 07:30:03 | D | + x: uint4 +25-08-28 07:30:03 | D | + y: None +25-08-28 07:30:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:30:03 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 07:30:03 | D | + x - AbsMax +25-08-28 07:30:03 | D | + x = [min=0.1719, max=31.6250] +25-08-28 07:30:03 | D | + w - AbsMax +25-08-28 07:30:03 | D | + w = [min=0.0228, max=0.6094] +25-08-28 07:30:03 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 07:30:04 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 07:30:52 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:30:52 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:30:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:30:52 | D | - sum error = [ 1495.3999, 1541.0375, 1670.1373, 1993.4158, 2608.0185] +25-08-28 07:30:52 | D | - best error = [ 1495.3999, 1495.3999, 1495.3999, 1495.3999, 1495.3999] +25-08-28 07:30:52 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:30:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:30:52 | D | - sum error = [ 3598.7267, 5158.4614, 6937.4554, 8898.1832, 10832.5273] +25-08-28 07:30:52 | D | - best error = [ 1495.3999, 1495.3999, 1495.3999, 1495.3999, 1495.3999] +25-08-28 07:30:52 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:30:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:30:52 | D | - sum error = [12828.2364, 15015.6596, 17557.0986, 20413.0452, 23273.0368] +25-08-28 07:30:52 | D | - best error = [ 1495.3999, 1495.3999, 1495.3999, 1495.3999, 1495.3999] +25-08-28 07:30:52 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:30:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:30:52 | D | - sum error = [25573.6752, 27026.1325, 28174.0601, 28849.7836, 29283.2195] +25-08-28 07:30:52 | D | - best error = [ 1495.3999, 1495.3999, 1495.3999, 1495.3999, 1495.3999] +25-08-28 07:30:52 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:30:52 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:30:52 | D | - sum error = [ 1550.3324, 1489.1033, 1494.8161, 1543.8007, 1653.0596] +25-08-28 07:30:52 | D | - best error = [ 1495.3999, 1489.1033, 1489.1033, 1489.1033, 1489.1033] +25-08-28 07:30:52 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:30:52 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:30:52 | D | - sum error = [ 1842.6770, 2125.7496, 2584.5978, 3608.1073, 5957.5116] +25-08-28 07:30:52 | D | - best error = [ 1489.1033, 1489.1033, 1489.1033, 1489.1033, 1489.1033] +25-08-28 07:30:52 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:30:52 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:30:52 | D | - sum error = [ 9676.0331, 13856.2898, 17960.1898, 21781.0178, 24661.3362] +25-08-28 07:30:52 | D | - best error = [ 1489.1033, 1489.1033, 1489.1033, 1489.1033, 1489.1033] +25-08-28 07:30:52 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:30:52 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:30:52 | D | - sum error = [26698.8203, 28015.0497, 28778.1473, 29268.7421] +25-08-28 07:30:52 | D | - best error = [ 1489.1033, 1489.1033, 1489.1033, 1489.1033] +25-08-28 07:30:52 | D | + error = 1489.1033 +25-08-28 07:30:52 | D | + scale = [min=1.4890, max=25.1718] +25-08-28 07:31:11 | D | - Smoothing Diffusion Block transformer_blocks.3 +25-08-28 07:31:11 | D | - Skipping Module transformer_blocks.3.norm1.linear +25-08-28 07:31:11 | D | - Skipping Module transformer_blocks.3.norm1_context.linear +25-08-28 07:31:11 | D | - Smoothing Transformer Block transformer_blocks.3 +25-08-28 07:31:11 | D | - transformer_blocks.3.attn.qkv_proj +25-08-28 07:31:11 | D | + w: sint4 +25-08-28 07:31:11 | D | + x: sint4 +25-08-28 07:31:11 | D | + y: None +25-08-28 07:31:11 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:31:11 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 07:31:11 | D | + x - AbsMax +25-08-28 07:31:11 | D | + x = [min=0.0233, max=12.5625] +25-08-28 07:31:11 | D | + w - AbsMax +25-08-28 07:31:11 | D | + w = [min=0.0923, max=1.1641] +25-08-28 07:31:11 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 07:31:12 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 07:32:24 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:32:24 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:32:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:32:24 | D | - sum error = [ 3502.3465, 3304.8703, 3033.7441, 2787.4202, 2647.7477] +25-08-28 07:32:24 | D | - best error = [ 3502.3465, 3304.8703, 3033.7441, 2787.4202, 2647.7477] +25-08-28 07:32:24 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:32:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:32:24 | D | - sum error = [ 2504.1753, 2454.9395, 2472.2904, 2553.5556, 2703.8879] +25-08-28 07:32:24 | D | - best error = [ 2504.1753, 2454.9395, 2454.9395, 2454.9395, 2454.9395] +25-08-28 07:32:24 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:32:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:32:24 | D | - sum error = [ 2874.0085, 3178.8597, 3475.6571, 3742.4934, 3958.7758] +25-08-28 07:32:24 | D | - best error = [ 2454.9395, 2454.9395, 2454.9395, 2454.9395, 2454.9395] +25-08-28 07:32:24 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:32:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:32:24 | D | - sum error = [ 4236.4197, 4537.4674, 4772.1693, 5086.9476, 5482.0255] +25-08-28 07:32:24 | D | - best error = [ 2454.9395, 2454.9395, 2454.9395, 2454.9395, 2454.9395] +25-08-28 07:32:24 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:32:24 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:32:24 | D | - sum error = [12367.5983, 11119.8842, 9939.2563, 8904.1739, 7999.9772] +25-08-28 07:32:24 | D | - best error = [ 2454.9395, 2454.9395, 2454.9395, 2454.9395, 2454.9395] +25-08-28 07:32:24 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:32:24 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:32:24 | D | - sum error = [ 6742.6968, 6093.8028, 5678.1647, 5390.3167, 5294.9300] +25-08-28 07:32:24 | D | - best error = [ 2454.9395, 2454.9395, 2454.9395, 2454.9395, 2454.9395] +25-08-28 07:32:24 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:32:24 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:32:24 | D | - sum error = [ 5292.4724, 5401.7303, 5187.4724, 5286.2949, 5288.0390] +25-08-28 07:32:24 | D | - best error = [ 2454.9395, 2454.9395, 2454.9395, 2454.9395, 2454.9395] +25-08-28 07:32:24 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:32:24 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:32:24 | D | - sum error = [ 5360.3994, 5405.1710, 5517.8048, 5717.8898] +25-08-28 07:32:24 | D | - best error = [ 2454.9395, 2454.9395, 2454.9395, 2454.9395] +25-08-28 07:32:24 | D | + error = 2454.9395 +25-08-28 07:32:24 | D | + scale = [min=0.3238, max=2.1366] +25-08-28 07:32:24 | D | - transformer_blocks.3.attn add_qkv_proj +25-08-28 07:32:24 | D | + w: sint4 +25-08-28 07:32:24 | D | + x: sint4 +25-08-28 07:32:24 | D | + y: None +25-08-28 07:32:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:32:24 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:32:24 | D | + x - AbsMax +25-08-28 07:32:24 | D | + x = [min=0.0825, max=23.6250] +25-08-28 07:32:24 | D | + w - AbsMax +25-08-28 07:32:24 | D | + w = [min=0.1060, max=0.4746] +25-08-28 07:32:24 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 07:32:25 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 07:33:28 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:33:28 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:33:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:33:28 | D | - sum error = [ 1630.8839, 1452.2658, 1379.9380, 1303.4525, 1241.0755] +25-08-28 07:33:28 | D | - best error = [ 1630.8839, 1452.2658, 1379.9380, 1303.4525, 1241.0755] +25-08-28 07:33:28 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:33:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:33:28 | D | - sum error = [ 1140.2467, 1113.9131, 1066.5994, 1037.1794, 1012.7432] +25-08-28 07:33:28 | D | - best error = [ 1140.2467, 1113.9131, 1066.5994, 1037.1794, 1012.7432] +25-08-28 07:33:28 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:33:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:33:28 | D | - sum error = [ 1014.1636, 1041.4269, 1047.2973, 1095.6682, 1126.2677] +25-08-28 07:33:28 | D | - best error = [ 1012.7432, 1012.7432, 1012.7432, 1012.7432, 1012.7432] +25-08-28 07:33:28 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:33:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:33:28 | D | - sum error = [ 1196.5033, 1234.3230, 1342.4711, 1435.1239, 1623.2772] +25-08-28 07:33:28 | D | - best error = [ 1012.7432, 1012.7432, 1012.7432, 1012.7432, 1012.7432] +25-08-28 07:33:28 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:33:28 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:33:28 | D | - sum error = [ 2380.3177, 2149.1284, 2021.0206, 1891.7620, 1807.1058] +25-08-28 07:33:28 | D | - best error = [ 1012.7432, 1012.7432, 1012.7432, 1012.7432, 1012.7432] +25-08-28 07:33:28 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:33:28 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:33:28 | D | - sum error = [ 1723.1519, 1625.7564, 1520.4537, 1436.2119, 1339.0039] +25-08-28 07:33:28 | D | - best error = [ 1012.7432, 1012.7432, 1012.7432, 1012.7432, 1012.7432] +25-08-28 07:33:28 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:33:28 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:33:28 | D | - sum error = [ 1325.9052, 1280.9547, 1286.4069, 1263.6573, 1299.9663] +25-08-28 07:33:28 | D | - best error = [ 1012.7432, 1012.7432, 1012.7432, 1012.7432, 1012.7432] +25-08-28 07:33:28 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:33:28 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:33:28 | D | - sum error = [ 1305.2336, 1356.4909, 1483.1884, 1612.8304] +25-08-28 07:33:28 | D | - best error = [ 1012.7432, 1012.7432, 1012.7432, 1012.7432] +25-08-28 07:33:28 | D | + error = 1012.7432 +25-08-28 07:33:28 | D | + scale = [min=0.3254, max=4.1497] +25-08-28 07:33:28 | D | - transformer_blocks.3.attn.out_proj + transformer_blocks.3.attn.add_out_proj +25-08-28 07:33:28 | D | + w: sint4 +25-08-28 07:33:28 | D | + x: sint4 +25-08-28 07:33:28 | D | + y: None +25-08-28 07:33:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:33:28 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 07:33:28 | D | + x - AbsMax +25-08-28 07:33:28 | D | + x = [min=0.3594, max=11.3125] +25-08-28 07:33:28 | D | + w - AbsMax +25-08-28 07:33:28 | D | + w = [min=0.1064, max=0.3906] +25-08-28 07:33:28 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 07:33:29 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 07:35:15 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:35:15 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:35:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:35:15 | D | - sum error = [ 4057.0152, 3965.7384, 3872.7171, 3791.7906, 3728.3629] +25-08-28 07:35:15 | D | - best error = [ 4057.0152, 3965.7384, 3872.7171, 3791.7906, 3728.3629] +25-08-28 07:35:15 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:35:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:35:15 | D | - sum error = [ 3667.2884, 3621.2387, 3594.0106, 3569.7049, 3555.6568] +25-08-28 07:35:15 | D | - best error = [ 3667.2884, 3621.2387, 3594.0106, 3569.7049, 3555.6568] +25-08-28 07:35:15 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:35:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:35:15 | D | - sum error = [ 3544.2370, 3569.8210, 3595.3011, 3627.8778, 3665.0917] +25-08-28 07:35:15 | D | - best error = [ 3544.2370, 3544.2370, 3544.2370, 3544.2370, 3544.2370] +25-08-28 07:35:15 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:35:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:35:15 | D | - sum error = [ 3715.2203, 3772.3112, 3858.1862, 3941.4623, 4040.9281] +25-08-28 07:35:15 | D | - best error = [ 3544.2370, 3544.2370, 3544.2370, 3544.2370, 3544.2370] +25-08-28 07:35:15 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:35:15 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:35:15 | D | - sum error = [ 4832.2469, 4642.0696, 4461.1288, 4293.3879, 4147.3051] +25-08-28 07:35:15 | D | - best error = [ 3544.2370, 3544.2370, 3544.2370, 3544.2370, 3544.2370] +25-08-28 07:35:15 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:35:15 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:35:15 | D | - sum error = [ 4022.1036, 3923.5386, 3828.2529, 3759.0142, 3717.4154] +25-08-28 07:35:15 | D | - best error = [ 3544.2370, 3544.2370, 3544.2370, 3544.2370, 3544.2370] +25-08-28 07:35:15 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:35:15 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:35:15 | D | - sum error = [ 3677.6749, 3660.7469, 3673.2889, 3689.8767, 3730.2369] +25-08-28 07:35:15 | D | - best error = [ 3544.2370, 3544.2370, 3544.2370, 3544.2370, 3544.2370] +25-08-28 07:35:15 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:35:15 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:35:15 | D | - sum error = [ 3767.2126, 3849.1745, 3927.4953, 4025.6106] +25-08-28 07:35:15 | D | - best error = [ 3544.2370, 3544.2370, 3544.2370, 3544.2370] +25-08-28 07:35:15 | D | + error = 3544.2370 +25-08-28 07:35:15 | D | + scale = [min=0.5995, max=3.3634] +25-08-28 07:35:15 | D | - transformer_blocks.3.ff.up_proj +25-08-28 07:35:15 | D | + w: sint4 +25-08-28 07:35:15 | D | + x: sint4 +25-08-28 07:35:15 | D | + y: None +25-08-28 07:35:15 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:35:15 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:35:16 | D | + x - AbsMax +25-08-28 07:35:16 | D | + x = [min=0.0281, max=12.5625] +25-08-28 07:35:16 | D | + w - AbsMax +25-08-28 07:35:16 | D | + w = [min=0.0552, max=0.5781] +25-08-28 07:35:16 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:35:17 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:36:32 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:36:32 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:36:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:36:32 | D | - sum error = [ 3909.5074, 3688.9493, 3497.6579, 3337.7785, 3202.1090] +25-08-28 07:36:32 | D | - best error = [ 3909.5074, 3688.9493, 3497.6579, 3337.7785, 3202.1090] +25-08-28 07:36:32 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:36:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:36:32 | D | - sum error = [ 3096.0452, 3009.4069, 2947.4240, 2906.3403, 2888.7072] +25-08-28 07:36:32 | D | - best error = [ 3096.0452, 3009.4069, 2947.4240, 2906.3403, 2888.7072] +25-08-28 07:36:32 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:36:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:36:32 | D | - sum error = [ 2903.8123, 2926.6361, 2987.3776, 3074.9151, 3204.6114] +25-08-28 07:36:32 | D | - best error = [ 2888.7072, 2888.7072, 2888.7072, 2888.7072, 2888.7072] +25-08-28 07:36:32 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:36:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:36:32 | D | - sum error = [ 3353.4995, 3570.1468, 3822.1073, 4136.8834, 4524.9139] +25-08-28 07:36:32 | D | - best error = [ 2888.7072, 2888.7072, 2888.7072, 2888.7072, 2888.7072] +25-08-28 07:36:32 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:36:32 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:36:32 | D | - sum error = [ 5587.0930, 5090.7952, 4705.6785, 4374.1253, 4060.2103] +25-08-28 07:36:32 | D | - best error = [ 2888.7072, 2888.7072, 2888.7072, 2888.7072, 2888.7072] +25-08-28 07:36:32 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:36:32 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:36:32 | D | - sum error = [ 3792.3179, 3561.4550, 3376.1779, 3238.0567, 3143.5744] +25-08-28 07:36:32 | D | - best error = [ 2888.7072, 2888.7072, 2888.7072, 2888.7072, 2888.7072] +25-08-28 07:36:32 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:36:32 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:36:32 | D | - sum error = [ 3084.2818, 3050.7947, 3066.5185, 3129.4364, 3236.1151] +25-08-28 07:36:32 | D | - best error = [ 2888.7072, 2888.7072, 2888.7072, 2888.7072, 2888.7072] +25-08-28 07:36:32 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:36:32 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:36:32 | D | - sum error = [ 3398.2875, 3638.3147, 3967.0881, 4410.3617] +25-08-28 07:36:32 | D | - best error = [ 2888.7072, 2888.7072, 2888.7072, 2888.7072] +25-08-28 07:36:32 | D | + error = 2888.7072 +25-08-28 07:36:32 | D | + scale = [min=0.2003, max=3.1231] +25-08-28 07:36:32 | D | - transformer_blocks.3.ff.down_proj +25-08-28 07:36:32 | D | + w: sint4 +25-08-28 07:36:32 | D | + x: uint4 +25-08-28 07:36:32 | D | + y: None +25-08-28 07:36:32 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:36:32 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:36:35 | D | + x - AbsMax +25-08-28 07:36:35 | D | + x = [min=0.3828, max=17.3750] +25-08-28 07:36:35 | D | + w - AbsMax +25-08-28 07:36:35 | D | + w = [min=0.0679, max=0.7656] +25-08-28 07:36:35 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:36:37 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:38:29 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:38:29 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:38:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:38:29 | D | - sum error = [ 3665.5172, 3586.6741, 3581.7469, 3631.9627, 3750.9388] +25-08-28 07:38:29 | D | - best error = [ 3665.5172, 3586.6741, 3581.7469, 3581.7469, 3581.7469] +25-08-28 07:38:29 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:38:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:38:29 | D | - sum error = [ 3919.7823, 4229.9995, 4625.4334, 5153.6230, 5902.5323] +25-08-28 07:38:29 | D | - best error = [ 3581.7469, 3581.7469, 3581.7469, 3581.7469, 3581.7469] +25-08-28 07:38:29 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:38:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:38:29 | D | - sum error = [ 6697.9887, 7769.2115, 8924.8471, 10281.1704, 11907.9987] +25-08-28 07:38:29 | D | - best error = [ 3581.7469, 3581.7469, 3581.7469, 3581.7469, 3581.7469] +25-08-28 07:38:29 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:38:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:38:29 | D | - sum error = [13658.8849, 15271.2949, 17231.4893, 19277.0114, 21487.5823] +25-08-28 07:38:29 | D | - best error = [ 3581.7469, 3581.7469, 3581.7469, 3581.7469, 3581.7469] +25-08-28 07:38:29 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:38:29 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:38:29 | D | - sum error = [ 5514.7236, 4923.0401, 4469.6523, 4154.2658, 3917.0722] +25-08-28 07:38:29 | D | - best error = [ 3581.7469, 3581.7469, 3581.7469, 3581.7469, 3581.7469] +25-08-28 07:38:29 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:38:29 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:38:29 | D | - sum error = [ 3788.1152, 3690.8362, 3745.1127, 3907.8114, 4241.6836] +25-08-28 07:38:29 | D | - best error = [ 3581.7469, 3581.7469, 3581.7469, 3581.7469, 3581.7469] +25-08-28 07:38:29 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:38:29 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:38:29 | D | - sum error = [ 4839.6527, 5685.6746, 6957.9710, 8475.8301, 10369.9481] +25-08-28 07:38:29 | D | - best error = [ 3581.7469, 3581.7469, 3581.7469, 3581.7469, 3581.7469] +25-08-28 07:38:29 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:38:29 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:38:29 | D | - sum error = [12605.0752, 14778.6256, 17438.4036, 20560.7050] +25-08-28 07:38:29 | D | - best error = [ 3581.7469, 3581.7469, 3581.7469, 3581.7469] +25-08-28 07:38:29 | D | + error = 3581.7469 +25-08-28 07:38:29 | D | + scale = [min=0.9084, max=1.3304] +25-08-28 07:38:30 | D | - transformer_blocks.3.ff_context.up_proj +25-08-28 07:38:30 | D | + w: sint4 +25-08-28 07:38:30 | D | + x: sint4 +25-08-28 07:38:30 | D | + y: None +25-08-28 07:38:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:38:30 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:38:30 | D | + x - AbsMax +25-08-28 07:38:30 | D | + x = [min=0.1025, max=13.5625] +25-08-28 07:38:30 | D | + w - AbsMax +25-08-28 07:38:30 | D | + w = [min=0.0957, max=0.5234] +25-08-28 07:38:30 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:38:30 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:39:14 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:39:14 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:39:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:39:14 | D | - sum error = [ 4444.6908, 4226.5083, 4013.4475, 3822.3293, 3660.1005] +25-08-28 07:39:14 | D | - best error = [ 4444.6908, 4226.5083, 4013.4475, 3822.3293, 3660.1005] +25-08-28 07:39:14 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:39:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:39:14 | D | - sum error = [ 3537.5927, 3425.3508, 3326.0310, 3259.4093, 3195.7005] +25-08-28 07:39:14 | D | - best error = [ 3537.5927, 3425.3508, 3326.0310, 3259.4093, 3195.7005] +25-08-28 07:39:14 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:39:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:39:14 | D | - sum error = [ 3169.0587, 3167.5025, 3191.3256, 3198.4076, 3224.1678] +25-08-28 07:39:14 | D | - best error = [ 3169.0587, 3167.5025, 3167.5025, 3167.5025, 3167.5025] +25-08-28 07:39:14 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:39:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:39:14 | D | - sum error = [ 3271.8666, 3348.2270, 3427.0657, 3548.7654, 3691.3481] +25-08-28 07:39:14 | D | - best error = [ 3167.5025, 3167.5025, 3167.5025, 3167.5025, 3167.5025] +25-08-28 07:39:14 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:39:14 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:39:14 | D | - sum error = [ 4447.8235, 4183.1082, 3955.5259, 3784.3651, 3582.1580] +25-08-28 07:39:14 | D | - best error = [ 3167.5025, 3167.5025, 3167.5025, 3167.5025, 3167.5025] +25-08-28 07:39:14 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:39:14 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:39:14 | D | - sum error = [ 3466.8790, 3370.8581, 3278.8779, 3212.6095, 3154.8819] +25-08-28 07:39:14 | D | - best error = [ 3167.5025, 3167.5025, 3167.5025, 3167.5025, 3154.8819] +25-08-28 07:39:14 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:39:14 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:39:14 | D | - sum error = [ 3139.7888, 3131.1059, 3118.6590, 3168.8528, 3207.1296] +25-08-28 07:39:14 | D | - best error = [ 3139.7888, 3131.1059, 3118.6590, 3118.6590, 3118.6590] +25-08-28 07:39:14 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:39:14 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:39:14 | D | - sum error = [ 3303.7439, 3401.2183, 3539.1796, 3681.6830] +25-08-28 07:39:14 | D | - best error = [ 3118.6590, 3118.6590, 3118.6590, 3118.6590] +25-08-28 07:39:14 | D | + error = 3118.6590 +25-08-28 07:39:14 | D | + scale = [min=0.4329, max=9.3879] +25-08-28 07:39:14 | D | - transformer_blocks.3.ff_context.down_proj +25-08-28 07:39:14 | D | + w: sint4 +25-08-28 07:39:14 | D | + x: uint4 +25-08-28 07:39:14 | D | + y: None +25-08-28 07:39:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:39:14 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:39:15 | D | + x - AbsMax +25-08-28 07:39:15 | D | + x = [min=0.1719, max=35.5000] +25-08-28 07:39:15 | D | + w - AbsMax +25-08-28 07:39:15 | D | + w = [min=0.0232, max=0.5352] +25-08-28 07:39:15 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:39:15 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:40:03 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:40:03 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:40:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:40:03 | D | - sum error = [ 1385.8769, 1490.3613, 1564.0483, 1756.6530, 2076.3872] +25-08-28 07:40:03 | D | - best error = [ 1385.8769, 1385.8769, 1385.8769, 1385.8769, 1385.8769] +25-08-28 07:40:03 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:40:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:40:03 | D | - sum error = [ 2548.3769, 3366.7875, 4463.0557, 5849.8099, 7425.7183] +25-08-28 07:40:03 | D | - best error = [ 1385.8769, 1385.8769, 1385.8769, 1385.8769, 1385.8769] +25-08-28 07:40:03 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:40:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:40:03 | D | - sum error = [ 9164.3566, 11348.2270, 14127.0657, 17551.1194, 21630.6538] +25-08-28 07:40:03 | D | - best error = [ 1385.8769, 1385.8769, 1385.8769, 1385.8769, 1385.8769] +25-08-28 07:40:03 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:40:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:40:03 | D | - sum error = [25648.5128, 29088.4862, 31638.7813, 33242.7330, 34397.7190] +25-08-28 07:40:03 | D | - best error = [ 1385.8769, 1385.8769, 1385.8769, 1385.8769, 1385.8769] +25-08-28 07:40:03 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:40:03 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:40:03 | D | - sum error = [ 1410.9276, 1388.2712, 1424.9396, 1518.6119, 1693.3034] +25-08-28 07:40:03 | D | - best error = [ 1385.8769, 1385.8769, 1385.8769, 1385.8769, 1385.8769] +25-08-28 07:40:03 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:40:03 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:40:03 | D | - sum error = [ 1923.2450, 2240.0087, 2619.4058, 3172.4896, 4646.6194] +25-08-28 07:40:03 | D | - best error = [ 1385.8769, 1385.8769, 1385.8769, 1385.8769, 1385.8769] +25-08-28 07:40:03 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:40:03 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:40:03 | D | - sum error = [ 7315.0863, 11070.8016, 15518.8051, 20350.7767, 24975.8129] +25-08-28 07:40:03 | D | - best error = [ 1385.8769, 1385.8769, 1385.8769, 1385.8769, 1385.8769] +25-08-28 07:40:03 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:40:03 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:40:03 | D | - sum error = [28679.9197, 31433.1321, 33173.8866, 34423.5956] +25-08-28 07:40:03 | D | - best error = [ 1385.8769, 1385.8769, 1385.8769, 1385.8769] +25-08-28 07:40:03 | D | + error = 1385.8769 +25-08-28 07:40:03 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 07:40:23 | D | - Smoothing Diffusion Block transformer_blocks.4 +25-08-28 07:40:23 | D | - Skipping Module transformer_blocks.4.norm1.linear +25-08-28 07:40:23 | D | - Skipping Module transformer_blocks.4.norm1_context.linear +25-08-28 07:40:23 | D | - Smoothing Transformer Block transformer_blocks.4 +25-08-28 07:40:23 | D | - transformer_blocks.4.attn.qkv_proj +25-08-28 07:40:23 | D | + w: sint4 +25-08-28 07:40:23 | D | + x: sint4 +25-08-28 07:40:23 | D | + y: None +25-08-28 07:40:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:40:23 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:40:23 | D | + x - AbsMax +25-08-28 07:40:23 | D | + x = [min=0.0212, max=23.7500] +25-08-28 07:40:23 | D | + w - AbsMax +25-08-28 07:40:23 | D | + w = [min=0.0996, max=1.1797] +25-08-28 07:40:23 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:40:25 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:41:40 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:41:40 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:41:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:41:40 | D | - sum error = [ 4981.3370, 4644.2091, 4379.5315, 4139.3040, 3933.7975] +25-08-28 07:41:40 | D | - best error = [ 4981.3370, 4644.2091, 4379.5315, 4139.3040, 3933.7975] +25-08-28 07:41:40 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:41:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:41:40 | D | - sum error = [ 3803.0279, 3823.9062, 3799.4722, 3790.5821, 3952.2528] +25-08-28 07:41:40 | D | - best error = [ 3803.0279, 3803.0279, 3799.4722, 3790.5821, 3790.5821] +25-08-28 07:41:40 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:41:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:41:40 | D | - sum error = [ 4159.9651, 4403.7788, 4803.6759, 4967.6286, 5277.9164] +25-08-28 07:41:40 | D | - best error = [ 3790.5821, 3790.5821, 3790.5821, 3790.5821, 3790.5821] +25-08-28 07:41:40 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:41:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:41:40 | D | - sum error = [ 5834.5591, 6302.6052, 6806.9788, 7500.0949, 7904.8297] +25-08-28 07:41:40 | D | - best error = [ 3790.5821, 3790.5821, 3790.5821, 3790.5821, 3790.5821] +25-08-28 07:41:40 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:41:40 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:41:40 | D | - sum error = [14940.4900, 12974.2712, 12089.3302, 11066.9656, 10158.7842] +25-08-28 07:41:40 | D | - best error = [ 3790.5821, 3790.5821, 3790.5821, 3790.5821, 3790.5821] +25-08-28 07:41:40 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:41:40 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:41:40 | D | - sum error = [ 9456.0592, 8759.5374, 8260.4571, 7883.7499, 7827.1849] +25-08-28 07:41:40 | D | - best error = [ 3790.5821, 3790.5821, 3790.5821, 3790.5821, 3790.5821] +25-08-28 07:41:40 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:41:40 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:41:40 | D | - sum error = [ 7575.2288, 7726.5770, 7712.2375, 7802.1236, 7660.7864] +25-08-28 07:41:40 | D | - best error = [ 3790.5821, 3790.5821, 3790.5821, 3790.5821, 3790.5821] +25-08-28 07:41:40 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:41:40 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:41:40 | D | - sum error = [ 7715.7722, 7798.2626, 8134.2973, 8206.6339] +25-08-28 07:41:40 | D | - best error = [ 3790.5821, 3790.5821, 3790.5821, 3790.5821] +25-08-28 07:41:40 | D | + error = 3790.5821 +25-08-28 07:41:40 | D | + scale = [min=0.2142, max=3.5503] +25-08-28 07:41:41 | D | - transformer_blocks.4.attn add_qkv_proj +25-08-28 07:41:41 | D | + w: sint4 +25-08-28 07:41:41 | D | + x: sint4 +25-08-28 07:41:41 | D | + y: None +25-08-28 07:41:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:41:41 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:41:41 | D | + x - AbsMax +25-08-28 07:41:41 | D | + x = [min=0.0776, max=19.0000] +25-08-28 07:41:41 | D | + w - AbsMax +25-08-28 07:41:41 | D | + w = [min=0.1069, max=0.4785] +25-08-28 07:41:41 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:41:42 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:42:49 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:42:49 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:42:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:42:49 | D | - sum error = [ 1521.5600, 1446.3069, 1430.3541, 1309.5263, 1242.9320] +25-08-28 07:42:49 | D | - best error = [ 1521.5600, 1446.3069, 1430.3541, 1309.5263, 1242.9320] +25-08-28 07:42:49 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:42:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:42:49 | D | - sum error = [ 1218.9588, 1166.4055, 1106.2946, 1105.0714, 1090.2955] +25-08-28 07:42:49 | D | - best error = [ 1218.9588, 1166.4055, 1106.2946, 1105.0714, 1090.2955] +25-08-28 07:42:49 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:42:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:42:49 | D | - sum error = [ 1080.8769, 1068.7668, 1066.6812, 1102.7275, 1073.2697] +25-08-28 07:42:49 | D | - best error = [ 1080.8769, 1068.7668, 1066.6812, 1066.6812, 1066.6812] +25-08-28 07:42:49 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:42:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:42:49 | D | - sum error = [ 1161.4692, 1230.5712, 1285.9879, 1383.9670, 1573.7868] +25-08-28 07:42:49 | D | - best error = [ 1066.6812, 1066.6812, 1066.6812, 1066.6812, 1066.6812] +25-08-28 07:42:49 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:42:49 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:42:49 | D | - sum error = [ 2163.2528, 2089.9022, 1939.8403, 1849.6817, 1768.2068] +25-08-28 07:42:49 | D | - best error = [ 1066.6812, 1066.6812, 1066.6812, 1066.6812, 1066.6812] +25-08-28 07:42:49 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:42:49 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:42:49 | D | - sum error = [ 1681.8182, 1579.3971, 1518.9013, 1430.0738, 1394.0764] +25-08-28 07:42:49 | D | - best error = [ 1066.6812, 1066.6812, 1066.6812, 1066.6812, 1066.6812] +25-08-28 07:42:49 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:42:49 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:42:49 | D | - sum error = [ 1319.9630, 1287.5092, 1254.1553, 1265.6630, 1252.6001] +25-08-28 07:42:49 | D | - best error = [ 1066.6812, 1066.6812, 1066.6812, 1066.6812, 1066.6812] +25-08-28 07:42:49 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:42:49 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:42:49 | D | - sum error = [ 1275.9016, 1319.6969, 1411.6069, 1567.9429] +25-08-28 07:42:49 | D | - best error = [ 1066.6812, 1066.6812, 1066.6812, 1066.6812] +25-08-28 07:42:49 | D | + error = 1066.6812 +25-08-28 07:42:49 | D | + scale = [min=0.2158, max=5.8513] +25-08-28 07:42:50 | D | - transformer_blocks.4.attn.out_proj + transformer_blocks.4.attn.add_out_proj +25-08-28 07:42:50 | D | + w: sint4 +25-08-28 07:42:50 | D | + x: sint4 +25-08-28 07:42:50 | D | + y: None +25-08-28 07:42:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:42:50 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 07:42:50 | D | + x - AbsMax +25-08-28 07:42:50 | D | + x = [min=0.5664, max=12.1875] +25-08-28 07:42:50 | D | + w - AbsMax +25-08-28 07:42:50 | D | + w = [min=0.1191, max=0.3906] +25-08-28 07:42:50 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:42:52 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:44:40 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:44:40 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:44:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:44:40 | D | - sum error = [ 4536.3523, 4466.0103, 4398.7546, 4332.2555, 4282.8551] +25-08-28 07:44:40 | D | - best error = [ 4536.3523, 4466.0103, 4398.7546, 4332.2555, 4282.8551] +25-08-28 07:44:40 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:44:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:44:40 | D | - sum error = [ 4239.4304, 4211.8376, 4171.7797, 4158.1561, 4161.6879] +25-08-28 07:44:40 | D | - best error = [ 4239.4304, 4211.8376, 4171.7797, 4158.1561, 4158.1561] +25-08-28 07:44:40 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:44:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:44:40 | D | - sum error = [ 4161.0436, 4168.8169, 4194.9863, 4213.9343, 4250.1466] +25-08-28 07:44:40 | D | - best error = [ 4158.1561, 4158.1561, 4158.1561, 4158.1561, 4158.1561] +25-08-28 07:44:40 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:44:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:44:40 | D | - sum error = [ 4298.6677, 4339.3785, 4392.7042, 4468.3491, 4534.3736] +25-08-28 07:44:40 | D | - best error = [ 4158.1561, 4158.1561, 4158.1561, 4158.1561, 4158.1561] +25-08-28 07:44:40 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:44:40 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:44:40 | D | - sum error = [ 5413.4008, 5220.5879, 5041.0527, 4881.8427, 4749.1797] +25-08-28 07:44:40 | D | - best error = [ 4158.1561, 4158.1561, 4158.1561, 4158.1561, 4158.1561] +25-08-28 07:44:40 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:44:40 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:44:40 | D | - sum error = [ 4633.5047, 4519.8482, 4446.4057, 4377.8234, 4323.5518] +25-08-28 07:44:40 | D | - best error = [ 4158.1561, 4158.1561, 4158.1561, 4158.1561, 4158.1561] +25-08-28 07:44:40 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:44:40 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:44:40 | D | - sum error = [ 4284.2119, 4260.3648, 4254.3987, 4254.1157, 4280.8431] +25-08-28 07:44:40 | D | - best error = [ 4158.1561, 4158.1561, 4158.1561, 4158.1561, 4158.1561] +25-08-28 07:44:40 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:44:40 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:44:40 | D | - sum error = [ 4317.5329, 4376.7616, 4441.0598, 4522.2948] +25-08-28 07:44:40 | D | - best error = [ 4158.1561, 4158.1561, 4158.1561, 4158.1561] +25-08-28 07:44:40 | D | + error = 4158.1561 +25-08-28 07:44:40 | D | + scale = [min=0.7966, max=2.7187] +25-08-28 07:44:40 | D | - transformer_blocks.4.ff.up_proj +25-08-28 07:44:40 | D | + w: sint4 +25-08-28 07:44:40 | D | + x: sint4 +25-08-28 07:44:40 | D | + y: None +25-08-28 07:44:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:44:40 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:44:41 | D | + x - AbsMax +25-08-28 07:44:41 | D | + x = [min=0.0527, max=23.7500] +25-08-28 07:44:41 | D | + w - AbsMax +25-08-28 07:44:41 | D | + w = [min=0.0977, max=0.5586] +25-08-28 07:44:41 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:44:42 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:45:59 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:45:59 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:45:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:45:59 | D | - sum error = [11269.2254, 10672.3965, 10161.7235, 9696.0437, 9299.2038] +25-08-28 07:45:59 | D | - best error = [11269.2254, 10672.3965, 10161.7235, 9696.0437, 9299.2038] +25-08-28 07:45:59 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:45:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:45:59 | D | - sum error = [ 8949.1340, 8658.9602, 8436.6652, 8253.1187, 8122.2403] +25-08-28 07:45:59 | D | - best error = [ 8949.1340, 8658.9602, 8436.6652, 8253.1187, 8122.2403] +25-08-28 07:45:59 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:45:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:45:59 | D | - sum error = [ 8045.5441, 8001.3281, 8013.8318, 8086.1128, 8192.3833] +25-08-28 07:45:59 | D | - best error = [ 8045.5441, 8001.3281, 8001.3281, 8001.3281, 8001.3281] +25-08-28 07:45:59 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:45:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:45:59 | D | - sum error = [ 8355.5270, 8594.9469, 8933.7625, 9377.4393, 9981.3654] +25-08-28 07:45:59 | D | - best error = [ 8001.3281, 8001.3281, 8001.3281, 8001.3281, 8001.3281] +25-08-28 07:45:59 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:45:59 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:45:59 | D | - sum error = [13039.4491, 12109.8221, 11345.4770, 10676.6815, 10089.3742] +25-08-28 07:45:59 | D | - best error = [ 8001.3281, 8001.3281, 8001.3281, 8001.3281, 8001.3281] +25-08-28 07:45:59 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:45:59 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:45:59 | D | - sum error = [ 9606.3425, 9188.0065, 8830.6738, 8557.0981, 8355.1919] +25-08-28 07:45:59 | D | - best error = [ 8001.3281, 8001.3281, 8001.3281, 8001.3281, 8001.3281] +25-08-28 07:45:59 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:45:59 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:45:59 | D | - sum error = [ 8211.1860, 8157.3388, 8136.9139, 8195.4914, 8325.7514] +25-08-28 07:45:59 | D | - best error = [ 8001.3281, 8001.3281, 8001.3281, 8001.3281, 8001.3281] +25-08-28 07:45:59 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:45:59 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:45:59 | D | - sum error = [ 8525.8618, 8840.9334, 9269.7249, 9893.6727] +25-08-28 07:45:59 | D | - best error = [ 8001.3281, 8001.3281, 8001.3281, 8001.3281] +25-08-28 07:45:59 | D | + error = 8001.3281 +25-08-28 07:45:59 | D | + scale = [min=0.1982, max=5.7097] +25-08-28 07:46:00 | D | - transformer_blocks.4.ff.down_proj +25-08-28 07:46:00 | D | + w: sint4 +25-08-28 07:46:00 | D | + x: uint4 +25-08-28 07:46:00 | D | + y: None +25-08-28 07:46:00 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:46:00 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:46:03 | D | + x - AbsMax +25-08-28 07:46:03 | D | + x = [min=0.1719, max=23.1250] +25-08-28 07:46:03 | D | + w - AbsMax +25-08-28 07:46:03 | D | + w = [min=0.0967, max=1.0938] +25-08-28 07:46:03 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 07:46:06 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 07:48:02 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:48:02 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:48:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:48:02 | D | - sum error = [ 7964.8121, 7943.0246, 7918.4145, 7901.1063, 7955.8321] +25-08-28 07:48:02 | D | - best error = [ 7964.8121, 7943.0246, 7918.4145, 7901.1063, 7901.1063] +25-08-28 07:48:02 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:48:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:48:02 | D | - sum error = [ 8023.5541, 8124.4215, 8265.0992, 8416.1374, 8692.2803] +25-08-28 07:48:02 | D | - best error = [ 7901.1063, 7901.1063, 7901.1063, 7901.1063, 7901.1063] +25-08-28 07:48:02 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:48:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:48:02 | D | - sum error = [ 9126.6616, 9588.0288, 10260.7661, 11123.7166, 12245.3191] +25-08-28 07:48:02 | D | - best error = [ 7901.1063, 7901.1063, 7901.1063, 7901.1063, 7901.1063] +25-08-28 07:48:02 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:48:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:48:02 | D | - sum error = [13461.7592, 14611.2335, 15770.9830, 17434.1807, 19951.2654] +25-08-28 07:48:02 | D | - best error = [ 7901.1063, 7901.1063, 7901.1063, 7901.1063, 7901.1063] +25-08-28 07:48:02 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:48:02 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:48:02 | D | - sum error = [ 9503.8544, 9178.2591, 8909.7255, 8687.7799, 8528.9160] +25-08-28 07:48:02 | D | - best error = [ 7901.1063, 7901.1063, 7901.1063, 7901.1063, 7901.1063] +25-08-28 07:48:02 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:48:02 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:48:02 | D | - sum error = [ 8420.3648, 8390.3492, 8379.6730, 8460.8326, 8660.5686] +25-08-28 07:48:02 | D | - best error = [ 7901.1063, 7901.1063, 7901.1063, 7901.1063, 7901.1063] +25-08-28 07:48:02 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:48:02 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:48:02 | D | - sum error = [ 9023.8041, 9556.5605, 10273.8822, 11459.9881, 12676.6033] +25-08-28 07:48:02 | D | - best error = [ 7901.1063, 7901.1063, 7901.1063, 7901.1063, 7901.1063] +25-08-28 07:48:02 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:48:02 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:48:02 | D | - sum error = [13931.5194, 15184.9441, 17164.1934, 19639.3963] +25-08-28 07:48:02 | D | - best error = [ 7901.1063, 7901.1063, 7901.1063, 7901.1063] +25-08-28 07:48:02 | D | + error = 7901.1063 +25-08-28 07:48:02 | D | + scale = [min=0.7679, max=1.6018] +25-08-28 07:48:02 | D | - transformer_blocks.4.ff_context.up_proj +25-08-28 07:48:02 | D | + w: sint4 +25-08-28 07:48:02 | D | + x: sint4 +25-08-28 07:48:02 | D | + y: None +25-08-28 07:48:02 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:48:02 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 07:48:02 | D | + x - AbsMax +25-08-28 07:48:02 | D | + x = [min=0.1162, max=10.3750] +25-08-28 07:48:02 | D | + w - AbsMax +25-08-28 07:48:02 | D | + w = [min=0.0854, max=0.6484] +25-08-28 07:48:02 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 07:48:03 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 07:48:47 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:48:47 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:48:47 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:48:47 | D | - sum error = [ 4481.2119, 4265.2751, 4071.2851, 3877.5417, 3734.5994] +25-08-28 07:48:47 | D | - best error = [ 4481.2119, 4265.2751, 4071.2851, 3877.5417, 3734.5994] +25-08-28 07:48:47 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:48:47 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:48:47 | D | - sum error = [ 3585.7353, 3491.5442, 3402.9123, 3330.3363, 3276.3248] +25-08-28 07:48:47 | D | - best error = [ 3585.7353, 3491.5442, 3402.9123, 3330.3363, 3276.3248] +25-08-28 07:48:47 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:48:47 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:48:47 | D | - sum error = [ 3232.4669, 3216.8475, 3211.2289, 3214.3167, 3262.4161] +25-08-28 07:48:47 | D | - best error = [ 3232.4669, 3216.8475, 3211.2289, 3211.2289, 3211.2289] +25-08-28 07:48:47 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:48:47 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:48:47 | D | - sum error = [ 3285.4768, 3355.9131, 3444.2163, 3545.2640, 3684.0326] +25-08-28 07:48:47 | D | - best error = [ 3211.2289, 3211.2289, 3211.2289, 3211.2289, 3211.2289] +25-08-28 07:48:47 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:48:47 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:48:47 | D | - sum error = [ 4436.7612, 4177.4861, 3971.3255, 3797.7983, 3650.7412] +25-08-28 07:48:47 | D | - best error = [ 3211.2289, 3211.2289, 3211.2289, 3211.2289, 3211.2289] +25-08-28 07:48:47 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:48:47 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:48:47 | D | - sum error = [ 3505.3943, 3401.4942, 3312.0577, 3246.3435, 3188.8540] +25-08-28 07:48:47 | D | - best error = [ 3211.2289, 3211.2289, 3211.2289, 3211.2289, 3188.8540] +25-08-28 07:48:47 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:48:47 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:48:47 | D | - sum error = [ 3176.8658, 3164.2250, 3185.3334, 3204.8220, 3284.0888] +25-08-28 07:48:47 | D | - best error = [ 3176.8658, 3164.2250, 3164.2250, 3164.2250, 3164.2250] +25-08-28 07:48:47 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:48:47 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:48:47 | D | - sum error = [ 3334.8393, 3397.9616, 3530.4804, 3647.6390] +25-08-28 07:48:47 | D | - best error = [ 3164.2250, 3164.2250, 3164.2250, 3164.2250] +25-08-28 07:48:47 | D | + error = 3164.2250 +25-08-28 07:48:47 | D | + scale = [min=0.5142, max=9.2641] +25-08-28 07:48:47 | D | - transformer_blocks.4.ff_context.down_proj +25-08-28 07:48:47 | D | + w: sint4 +25-08-28 07:48:47 | D | + x: uint4 +25-08-28 07:48:47 | D | + y: None +25-08-28 07:48:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:48:47 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 07:48:47 | D | + x - AbsMax +25-08-28 07:48:47 | D | + x = [min=0.1719, max=36.7500] +25-08-28 07:48:47 | D | + w - AbsMax +25-08-28 07:48:47 | D | + w = [min=0.0227, max=0.5391] +25-08-28 07:48:47 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 07:48:48 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 07:49:37 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:49:37 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:49:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:49:37 | D | - sum error = [ 1532.3620, 1554.6297, 1638.2169, 1867.0913, 2236.5621] +25-08-28 07:49:37 | D | - best error = [ 1532.3620, 1532.3620, 1532.3620, 1532.3620, 1532.3620] +25-08-28 07:49:37 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:49:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:49:37 | D | - sum error = [ 2857.3739, 3866.4614, 5101.2259, 6569.7035, 8033.2678] +25-08-28 07:49:37 | D | - best error = [ 1532.3620, 1532.3620, 1532.3620, 1532.3620, 1532.3620] +25-08-28 07:49:37 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:49:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:49:37 | D | - sum error = [ 9521.5276, 11201.9002, 13184.1905, 15349.4464, 17529.0023] +25-08-28 07:49:37 | D | - best error = [ 1532.3620, 1532.3620, 1532.3620, 1532.3620, 1532.3620] +25-08-28 07:49:37 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:49:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:49:37 | D | - sum error = [19806.0904, 21936.3610, 23687.8365, 24797.8747, 25514.1649] +25-08-28 07:49:37 | D | - best error = [ 1532.3620, 1532.3620, 1532.3620, 1532.3620, 1532.3620] +25-08-28 07:49:37 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:49:37 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:49:37 | D | - sum error = [ 1479.7417, 1462.2195, 1495.3077, 1588.4614, 1774.2500] +25-08-28 07:49:37 | D | - best error = [ 1479.7417, 1462.2195, 1462.2195, 1462.2195, 1462.2195] +25-08-28 07:49:37 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:49:37 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:49:37 | D | - sum error = [ 2050.1967, 2296.9150, 2636.1007, 3405.4311, 4985.4011] +25-08-28 07:49:37 | D | - best error = [ 1462.2195, 1462.2195, 1462.2195, 1462.2195, 1462.2195] +25-08-28 07:49:37 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:49:37 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:49:37 | D | - sum error = [ 7643.6643, 10698.3621, 13859.8136, 16824.5383, 19521.6475] +25-08-28 07:49:37 | D | - best error = [ 1462.2195, 1462.2195, 1462.2195, 1462.2195, 1462.2195] +25-08-28 07:49:37 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:49:37 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:49:37 | D | - sum error = [21864.1098, 23733.6905, 24816.9155, 25499.0974] +25-08-28 07:49:37 | D | - best error = [ 1462.2195, 1462.2195, 1462.2195, 1462.2195] +25-08-28 07:49:37 | D | + error = 1462.2195 +25-08-28 07:49:37 | D | + scale = [min=1.6832, max=25.2936] +25-08-28 07:49:57 | D | - Smoothing Diffusion Block transformer_blocks.5 +25-08-28 07:49:57 | D | - Skipping Module transformer_blocks.5.norm1.linear +25-08-28 07:49:57 | D | - Skipping Module transformer_blocks.5.norm1_context.linear +25-08-28 07:49:57 | D | - Smoothing Transformer Block transformer_blocks.5 +25-08-28 07:49:57 | D | - transformer_blocks.5.attn.qkv_proj +25-08-28 07:49:57 | D | + w: sint4 +25-08-28 07:49:57 | D | + x: sint4 +25-08-28 07:49:57 | D | + y: None +25-08-28 07:49:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:49:57 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:49:57 | D | + x - AbsMax +25-08-28 07:49:57 | D | + x = [min=0.0381, max=18.5000] +25-08-28 07:49:57 | D | + w - AbsMax +25-08-28 07:49:57 | D | + w = [min=0.1069, max=0.8477] +25-08-28 07:49:57 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:49:58 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:51:15 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:51:15 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:51:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:51:15 | D | - sum error = [ 4501.9107, 4249.8908, 4062.7901, 3828.7728, 3686.8576] +25-08-28 07:51:15 | D | - best error = [ 4501.9107, 4249.8908, 4062.7901, 3828.7728, 3686.8576] +25-08-28 07:51:15 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:51:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:51:15 | D | - sum error = [ 3616.7637, 3611.5790, 3572.8216, 3563.2481, 3593.8139] +25-08-28 07:51:15 | D | - best error = [ 3616.7637, 3611.5790, 3572.8216, 3563.2481, 3563.2481] +25-08-28 07:51:15 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:51:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:51:15 | D | - sum error = [ 3695.0471, 3722.2961, 3949.2579, 4049.0536, 4266.4430] +25-08-28 07:51:15 | D | - best error = [ 3563.2481, 3563.2481, 3563.2481, 3563.2481, 3563.2481] +25-08-28 07:51:15 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:51:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:51:15 | D | - sum error = [ 4454.8015, 4731.9446, 4863.0943, 5102.5850, 5472.4696] +25-08-28 07:51:15 | D | - best error = [ 3563.2481, 3563.2481, 3563.2481, 3563.2481, 3563.2481] +25-08-28 07:51:15 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:51:15 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:51:15 | D | - sum error = [10145.5275, 9003.5215, 8344.0091, 7335.9948, 6870.1467] +25-08-28 07:51:15 | D | - best error = [ 3563.2481, 3563.2481, 3563.2481, 3563.2481, 3563.2481] +25-08-28 07:51:15 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:51:15 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:51:15 | D | - sum error = [ 6300.2165, 5944.2423, 5500.7994, 5251.9779, 5001.2776] +25-08-28 07:51:15 | D | - best error = [ 3563.2481, 3563.2481, 3563.2481, 3563.2481, 3563.2481] +25-08-28 07:51:15 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:51:15 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:51:15 | D | - sum error = [ 5018.1126, 4782.2392, 4735.6579, 4807.2158, 4882.6476] +25-08-28 07:51:15 | D | - best error = [ 3563.2481, 3563.2481, 3563.2481, 3563.2481, 3563.2481] +25-08-28 07:51:15 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:51:15 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:51:15 | D | - sum error = [ 4912.2121, 5144.4069, 5291.9645, 5515.1279] +25-08-28 07:51:15 | D | - best error = [ 3563.2481, 3563.2481, 3563.2481, 3563.2481] +25-08-28 07:51:15 | D | + error = 3563.2481 +25-08-28 07:51:15 | D | + scale = [min=0.2706, max=3.2127] +25-08-28 07:51:16 | D | - transformer_blocks.5.attn add_qkv_proj +25-08-28 07:51:16 | D | + w: sint4 +25-08-28 07:51:16 | D | + x: sint4 +25-08-28 07:51:16 | D | + y: None +25-08-28 07:51:16 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:51:16 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:51:16 | D | + x - AbsMax +25-08-28 07:51:16 | D | + x = [min=0.0898, max=24.5000] +25-08-28 07:51:16 | D | + w - AbsMax +25-08-28 07:51:16 | D | + w = [min=0.1118, max=0.3809] +25-08-28 07:51:16 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:51:17 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:52:21 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:52:21 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:52:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:52:21 | D | - sum error = [ 1125.6608, 1076.4148, 1007.0647, 944.3300, 900.2735] +25-08-28 07:52:21 | D | - best error = [ 1125.6608, 1076.4148, 1007.0647, 944.3300, 900.2735] +25-08-28 07:52:21 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:52:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:52:21 | D | - sum error = [ 865.5003, 830.1201, 793.0552, 768.3580, 748.1849] +25-08-28 07:52:21 | D | - best error = [ 865.5003, 830.1201, 793.0552, 768.3580, 748.1849] +25-08-28 07:52:21 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:52:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:52:21 | D | - sum error = [ 723.9462, 728.8558, 749.5761, 743.4263, 763.8539] +25-08-28 07:52:21 | D | - best error = [ 723.9462, 723.9462, 723.9462, 723.9462, 723.9462] +25-08-28 07:52:21 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:52:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:52:21 | D | - sum error = [ 789.7505, 819.4618, 877.1623, 961.8098, 1079.3960] +25-08-28 07:52:21 | D | - best error = [ 723.9462, 723.9462, 723.9462, 723.9462, 723.9462] +25-08-28 07:52:21 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:52:21 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:52:21 | D | - sum error = [ 1457.0444, 1338.1234, 1255.7137, 1213.4059, 1112.1487] +25-08-28 07:52:21 | D | - best error = [ 723.9462, 723.9462, 723.9462, 723.9462, 723.9462] +25-08-28 07:52:21 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:52:21 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:52:21 | D | - sum error = [ 1054.0579, 1007.7661, 945.2026, 898.0840, 845.7598] +25-08-28 07:52:21 | D | - best error = [ 723.9462, 723.9462, 723.9462, 723.9462, 723.9462] +25-08-28 07:52:21 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:52:21 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:52:21 | D | - sum error = [ 825.0660, 805.5602, 796.8708, 779.7625, 806.5284] +25-08-28 07:52:21 | D | - best error = [ 723.9462, 723.9462, 723.9462, 723.9462, 723.9462] +25-08-28 07:52:21 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:52:21 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:52:21 | D | - sum error = [ 835.5187, 887.8778, 956.8284, 1076.0947] +25-08-28 07:52:21 | D | - best error = [ 723.9462, 723.9462, 723.9462, 723.9462] +25-08-28 07:52:21 | D | + error = 723.9462 +25-08-28 07:52:21 | D | + scale = [min=0.2997, max=4.9497] +25-08-28 07:52:21 | D | - transformer_blocks.5.attn.out_proj + transformer_blocks.5.attn.add_out_proj +25-08-28 07:52:21 | D | + w: sint4 +25-08-28 07:52:21 | D | + x: sint4 +25-08-28 07:52:21 | D | + y: None +25-08-28 07:52:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:52:21 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:52:22 | D | + x - AbsMax +25-08-28 07:52:22 | D | + x = [min=0.8281, max=8.7500] +25-08-28 07:52:22 | D | + w - AbsMax +25-08-28 07:52:22 | D | + w = [min=0.1216, max=0.3066] +25-08-28 07:52:22 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:52:23 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:54:13 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:54:13 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:54:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:54:13 | D | - sum error = [ 4800.3652, 4779.5083, 4761.3244, 4764.9892, 4761.2933] +25-08-28 07:54:13 | D | - best error = [ 4800.3652, 4779.5083, 4761.3244, 4761.3244, 4761.2933] +25-08-28 07:54:13 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:54:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:54:13 | D | - sum error = [ 4765.9639, 4772.5341, 4785.0567, 4785.2225, 4798.2939] +25-08-28 07:54:13 | D | - best error = [ 4761.2933, 4761.2933, 4761.2933, 4761.2933, 4761.2933] +25-08-28 07:54:13 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:54:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:54:13 | D | - sum error = [ 4840.1302, 4875.1652, 4900.8032, 4942.7798, 4980.6506] +25-08-28 07:54:13 | D | - best error = [ 4761.2933, 4761.2933, 4761.2933, 4761.2933, 4761.2933] +25-08-28 07:54:13 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:54:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:54:13 | D | - sum error = [ 5037.9740, 5083.3265, 5150.4301, 5205.6099, 5262.9617] +25-08-28 07:54:13 | D | - best error = [ 4761.2933, 4761.2933, 4761.2933, 4761.2933, 4761.2933] +25-08-28 07:54:13 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:54:13 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:54:13 | D | - sum error = [ 5045.9778, 5004.1199, 4962.9578, 4931.2966, 4910.4193] +25-08-28 07:54:13 | D | - best error = [ 4761.2933, 4761.2933, 4761.2933, 4761.2933, 4761.2933] +25-08-28 07:54:13 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:54:13 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:54:13 | D | - sum error = [ 4888.9220, 4891.2767, 4886.4775, 4890.8627, 4887.9652] +25-08-28 07:54:13 | D | - best error = [ 4761.2933, 4761.2933, 4761.2933, 4761.2933, 4761.2933] +25-08-28 07:54:13 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:54:13 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:54:13 | D | - sum error = [ 4893.2051, 4920.0002, 4948.5535, 4995.0652, 5033.1306] +25-08-28 07:54:13 | D | - best error = [ 4761.2933, 4761.2933, 4761.2933, 4761.2933, 4761.2933] +25-08-28 07:54:13 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:54:13 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:54:13 | D | - sum error = [ 5070.5071, 5145.7699, 5202.1374, 5257.1350] +25-08-28 07:54:13 | D | - best error = [ 4761.2933, 4761.2933, 4761.2933, 4761.2933] +25-08-28 07:54:13 | D | + error = 4761.2933 +25-08-28 07:54:13 | D | + scale = [min=0.9630, max=1.5431] +25-08-28 07:54:13 | D | - transformer_blocks.5.ff.up_proj +25-08-28 07:54:13 | D | + w: sint4 +25-08-28 07:54:13 | D | + x: sint4 +25-08-28 07:54:13 | D | + y: None +25-08-28 07:54:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:54:13 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:54:14 | D | + x - AbsMax +25-08-28 07:54:14 | D | + x = [min=0.0659, max=22.1250] +25-08-28 07:54:14 | D | + w - AbsMax +25-08-28 07:54:14 | D | + w = [min=0.1113, max=0.6328] +25-08-28 07:54:14 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:54:15 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:55:32 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:55:32 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:55:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:55:32 | D | - sum error = [12961.6629, 12272.9837, 11681.9327, 11152.5663, 10690.1457] +25-08-28 07:55:32 | D | - best error = [12961.6629, 12272.9837, 11681.9327, 11152.5663, 10690.1457] +25-08-28 07:55:32 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:55:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:55:32 | D | - sum error = [10349.2114, 10069.6745, 9844.9918, 9692.9438, 9640.8979] +25-08-28 07:55:32 | D | - best error = [10349.2114, 10069.6745, 9844.9918, 9692.9438, 9640.8979] +25-08-28 07:55:32 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:55:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:55:32 | D | - sum error = [ 9611.5809, 9659.3888, 9790.5313, 9958.6634, 10212.6702] +25-08-28 07:55:32 | D | - best error = [ 9611.5809, 9611.5809, 9611.5809, 9611.5809, 9611.5809] +25-08-28 07:55:32 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:55:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:55:32 | D | - sum error = [10532.0949, 10940.7576, 11472.4432, 12100.5091, 12960.8901] +25-08-28 07:55:32 | D | - best error = [ 9611.5809, 9611.5809, 9611.5809, 9611.5809, 9611.5809] +25-08-28 07:55:32 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:55:32 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:55:32 | D | - sum error = [15272.5207, 14215.4069, 13304.4065, 12521.9027, 11847.6772] +25-08-28 07:55:32 | D | - best error = [ 9611.5809, 9611.5809, 9611.5809, 9611.5809, 9611.5809] +25-08-28 07:55:32 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:55:32 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:55:32 | D | - sum error = [11284.2028, 10837.1675, 10465.3168, 10197.6899, 10007.3485] +25-08-28 07:55:32 | D | - best error = [ 9611.5809, 9611.5809, 9611.5809, 9611.5809, 9611.5809] +25-08-28 07:55:32 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:55:32 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:55:32 | D | - sum error = [ 9938.9106, 9953.7229, 10042.7279, 10223.0183, 10484.3075] +25-08-28 07:55:32 | D | - best error = [ 9611.5809, 9611.5809, 9611.5809, 9611.5809, 9611.5809] +25-08-28 07:55:32 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:55:32 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:55:32 | D | - sum error = [10847.8879, 11377.9942, 11982.6635, 12862.8090] +25-08-28 07:55:32 | D | - best error = [ 9611.5809, 9611.5809, 9611.5809, 9611.5809] +25-08-28 07:55:32 | D | + error = 9611.5809 +25-08-28 07:55:32 | D | + scale = [min=0.2567, max=4.7037] +25-08-28 07:55:32 | D | - transformer_blocks.5.ff.down_proj +25-08-28 07:55:32 | D | + w: sint4 +25-08-28 07:55:32 | D | + x: uint4 +25-08-28 07:55:32 | D | + y: None +25-08-28 07:55:32 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:55:32 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:55:35 | D | + x - AbsMax +25-08-28 07:55:35 | D | + x = [min=0.1719, max=32.5000] +25-08-28 07:55:35 | D | + w - AbsMax +25-08-28 07:55:35 | D | + w = [min=0.0334, max=1.4688] +25-08-28 07:55:35 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:55:37 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:57:27 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:57:27 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:57:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:57:27 | D | - sum error = [ 8207.1023, 8029.9300, 8007.9845, 7991.8296, 8099.8516] +25-08-28 07:57:27 | D | - best error = [ 8207.1023, 8029.9300, 8007.9845, 7991.8296, 7991.8296] +25-08-28 07:57:27 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:57:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:57:27 | D | - sum error = [ 8283.0555, 8491.2694, 9093.3646, 9873.0660, 11172.7801] +25-08-28 07:57:27 | D | - best error = [ 7991.8296, 7991.8296, 7991.8296, 7991.8296, 7991.8296] +25-08-28 07:57:27 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:57:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:57:27 | D | - sum error = [13291.0785, 16516.5972, 20586.2696, 24901.9916, 28833.3551] +25-08-28 07:57:27 | D | - best error = [ 7991.8296, 7991.8296, 7991.8296, 7991.8296, 7991.8296] +25-08-28 07:57:27 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:57:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:57:27 | D | - sum error = [32683.8549, 35714.4981, 38275.6854, 40383.5135, 42403.5697] +25-08-28 07:57:27 | D | - best error = [ 7991.8296, 7991.8296, 7991.8296, 7991.8296, 7991.8296] +25-08-28 07:57:27 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:57:27 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:57:27 | D | - sum error = [10000.6916, 9500.3899, 9139.5217, 8885.6333, 8822.4500] +25-08-28 07:57:27 | D | - best error = [ 7991.8296, 7991.8296, 7991.8296, 7991.8296, 7991.8296] +25-08-28 07:57:27 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:57:27 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:57:27 | D | - sum error = [ 8746.5435, 8923.7714, 9289.1085, 9849.3468, 11117.1079] +25-08-28 07:57:27 | D | - best error = [ 7991.8296, 7991.8296, 7991.8296, 7991.8296, 7991.8296] +25-08-28 07:57:27 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:57:27 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:57:27 | D | - sum error = [13755.4190, 17830.8954, 22954.1756, 27877.8329, 31683.6473] +25-08-28 07:57:27 | D | - best error = [ 7991.8296, 7991.8296, 7991.8296, 7991.8296, 7991.8296] +25-08-28 07:57:27 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:57:27 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:57:27 | D | - sum error = [34861.8375, 37594.8456, 40066.2747, 42464.3150] +25-08-28 07:57:27 | D | - best error = [ 7991.8296, 7991.8296, 7991.8296, 7991.8296] +25-08-28 07:57:27 | D | + error = 7991.8296 +25-08-28 07:57:27 | D | + scale = [min=0.7679, max=1.6857] +25-08-28 07:57:28 | D | - transformer_blocks.5.ff_context.up_proj +25-08-28 07:57:28 | D | + w: sint4 +25-08-28 07:57:28 | D | + x: sint4 +25-08-28 07:57:28 | D | + y: None +25-08-28 07:57:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:57:28 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:57:28 | D | + x - AbsMax +25-08-28 07:57:28 | D | + x = [min=0.1621, max=20.0000] +25-08-28 07:57:28 | D | + w - AbsMax +25-08-28 07:57:28 | D | + w = [min=0.0859, max=0.5430] +25-08-28 07:57:28 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:57:28 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 07:58:12 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:58:12 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:58:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:58:12 | D | - sum error = [ 4835.3254, 4564.8584, 4306.0139, 4085.2661, 3871.6404] +25-08-28 07:58:12 | D | - best error = [ 4835.3254, 4564.8584, 4306.0139, 4085.2661, 3871.6404] +25-08-28 07:58:12 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:58:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:58:12 | D | - sum error = [ 3713.1392, 3572.2420, 3436.4853, 3326.8777, 3238.3039] +25-08-28 07:58:12 | D | - best error = [ 3713.1392, 3572.2420, 3436.4853, 3326.8777, 3238.3039] +25-08-28 07:58:12 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:58:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:58:12 | D | - sum error = [ 3175.3505, 3126.2991, 3110.0251, 3090.5864, 3079.4516] +25-08-28 07:58:12 | D | - best error = [ 3175.3505, 3126.2991, 3110.0251, 3090.5864, 3079.4516] +25-08-28 07:58:12 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:58:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:58:12 | D | - sum error = [ 3104.9196, 3125.6209, 3215.0327, 3281.3063, 3366.4189] +25-08-28 07:58:12 | D | - best error = [ 3079.4516, 3079.4516, 3079.4516, 3079.4516, 3079.4516] +25-08-28 07:58:12 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:58:12 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:58:12 | D | - sum error = [ 4124.4842, 3925.4634, 3728.4839, 3568.2059, 3420.4529] +25-08-28 07:58:12 | D | - best error = [ 3079.4516, 3079.4516, 3079.4516, 3079.4516, 3079.4516] +25-08-28 07:58:12 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:58:12 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:58:12 | D | - sum error = [ 3311.4473, 3221.3021, 3136.9973, 3077.7378, 3026.3917] +25-08-28 07:58:12 | D | - best error = [ 3079.4516, 3079.4516, 3079.4516, 3077.7378, 3026.3917] +25-08-28 07:58:12 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:58:12 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:58:12 | D | - sum error = [ 3007.2842, 2991.6678, 2997.6931, 3018.3840, 3062.0738] +25-08-28 07:58:12 | D | - best error = [ 3007.2842, 2991.6678, 2991.6678, 2991.6678, 2991.6678] +25-08-28 07:58:12 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:58:12 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:58:12 | D | - sum error = [ 3101.1383, 3173.7713, 3265.8585, 3352.2413] +25-08-28 07:58:12 | D | - best error = [ 2991.6678, 2991.6678, 2991.6678, 2991.6678] +25-08-28 07:58:12 | D | + error = 2991.6678 +25-08-28 07:58:12 | D | + scale = [min=0.6497, max=14.6760] +25-08-28 07:58:12 | D | - transformer_blocks.5.ff_context.down_proj +25-08-28 07:58:12 | D | + w: sint4 +25-08-28 07:58:12 | D | + x: uint4 +25-08-28 07:58:12 | D | + y: None +25-08-28 07:58:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:58:12 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:58:13 | D | + x - AbsMax +25-08-28 07:58:13 | D | + x = [min=0.1719, max=28.0000] +25-08-28 07:58:13 | D | + w - AbsMax +25-08-28 07:58:13 | D | + w = [min=0.0244, max=0.5547] +25-08-28 07:58:13 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 07:58:13 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 07:59:02 | D | - x / w range = AbsMax / AbsMax +25-08-28 07:59:02 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 07:59:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:59:02 | D | - sum error = [ 1509.1479, 1562.7105, 1649.6863, 1775.0806, 2052.7952] +25-08-28 07:59:02 | D | - best error = [ 1509.1479, 1509.1479, 1509.1479, 1509.1479, 1509.1479] +25-08-28 07:59:02 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 07:59:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:59:02 | D | - sum error = [ 2602.4279, 3520.4238, 4900.8940, 6654.2639, 8932.6722] +25-08-28 07:59:02 | D | - best error = [ 1509.1479, 1509.1479, 1509.1479, 1509.1479, 1509.1479] +25-08-28 07:59:02 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 07:59:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:59:02 | D | - sum error = [11449.5527, 14358.7325, 17642.6175, 21061.0435, 24332.8663] +25-08-28 07:59:02 | D | - best error = [ 1509.1479, 1509.1479, 1509.1479, 1509.1479, 1509.1479] +25-08-28 07:59:02 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:59:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 07:59:02 | D | - sum error = [27283.8332, 29720.4781, 31474.7248, 32809.1753, 33944.5345] +25-08-28 07:59:02 | D | - best error = [ 1509.1479, 1509.1479, 1509.1479, 1509.1479, 1509.1479] +25-08-28 07:59:02 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 07:59:02 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 07:59:02 | D | - sum error = [ 1487.0012, 1483.6450, 1514.8871, 1625.9506, 1777.1703] +25-08-28 07:59:02 | D | - best error = [ 1487.0012, 1483.6450, 1483.6450, 1483.6450, 1483.6450] +25-08-28 07:59:02 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 07:59:02 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 07:59:02 | D | - sum error = [ 1988.4411, 2215.5979, 2641.6066, 3420.1889, 5153.8758] +25-08-28 07:59:02 | D | - best error = [ 1483.6450, 1483.6450, 1483.6450, 1483.6450, 1483.6450] +25-08-28 07:59:02 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 07:59:02 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 07:59:02 | D | - sum error = [ 8440.2967, 12961.8079, 17886.1573, 22494.0696, 26381.3571] +25-08-28 07:59:02 | D | - best error = [ 1483.6450, 1483.6450, 1483.6450, 1483.6450, 1483.6450] +25-08-28 07:59:02 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 07:59:02 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 07:59:02 | D | - sum error = [29232.0383, 31419.0149, 32858.5176, 33986.0602] +25-08-28 07:59:02 | D | - best error = [ 1483.6450, 1483.6450, 1483.6450, 1483.6450] +25-08-28 07:59:02 | D | + error = 1483.6450 +25-08-28 07:59:02 | D | + scale = [min=1.6399, max=23.6944] +25-08-28 07:59:22 | D | - Smoothing Diffusion Block transformer_blocks.6 +25-08-28 07:59:22 | D | - Skipping Module transformer_blocks.6.norm1.linear +25-08-28 07:59:22 | D | - Skipping Module transformer_blocks.6.norm1_context.linear +25-08-28 07:59:22 | D | - Smoothing Transformer Block transformer_blocks.6 +25-08-28 07:59:22 | D | - transformer_blocks.6.attn.qkv_proj +25-08-28 07:59:22 | D | + w: sint4 +25-08-28 07:59:22 | D | + x: sint4 +25-08-28 07:59:22 | D | + y: None +25-08-28 07:59:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 07:59:22 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 07:59:22 | D | + x - AbsMax +25-08-28 07:59:22 | D | + x = [min=0.0444, max=16.8750] +25-08-28 07:59:22 | D | + w - AbsMax +25-08-28 07:59:22 | D | + w = [min=0.1001, max=0.5664] +25-08-28 07:59:22 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 07:59:23 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:00:35 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:00:35 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:00:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:00:35 | D | - sum error = [ 3870.4028, 3705.7639, 3535.5776, 3425.2920, 3407.2319] +25-08-28 08:00:35 | D | - best error = [ 3870.4028, 3705.7639, 3535.5776, 3425.2920, 3407.2319] +25-08-28 08:00:35 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:00:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:00:35 | D | - sum error = [ 3387.2605, 3437.1670, 3400.3132, 3441.6188, 3512.1735] +25-08-28 08:00:35 | D | - best error = [ 3387.2605, 3387.2605, 3387.2605, 3387.2605, 3387.2605] +25-08-28 08:00:35 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:00:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:00:35 | D | - sum error = [ 3588.9909, 3762.5586, 3894.6825, 4118.0248, 4353.1040] +25-08-28 08:00:35 | D | - best error = [ 3387.2605, 3387.2605, 3387.2605, 3387.2605, 3387.2605] +25-08-28 08:00:35 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:00:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:00:35 | D | - sum error = [ 4620.8127, 4820.7340, 5032.3482, 5270.6275, 5552.7669] +25-08-28 08:00:35 | D | - best error = [ 3387.2605, 3387.2605, 3387.2605, 3387.2605, 3387.2605] +25-08-28 08:00:35 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:00:35 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:00:35 | D | - sum error = [ 7884.3252, 7270.0575, 6623.0262, 6097.8399, 5718.1065] +25-08-28 08:00:35 | D | - best error = [ 3387.2605, 3387.2605, 3387.2605, 3387.2605, 3387.2605] +25-08-28 08:00:35 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:00:35 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:00:35 | D | - sum error = [ 5523.5706, 5434.4864, 5176.0753, 5038.0728, 4980.6184] +25-08-28 08:00:35 | D | - best error = [ 3387.2605, 3387.2605, 3387.2605, 3387.2605, 3387.2605] +25-08-28 08:00:35 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:00:35 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:00:35 | D | - sum error = [ 4777.3545, 4745.0764, 4748.5785, 4786.2553, 4995.8371] +25-08-28 08:00:35 | D | - best error = [ 3387.2605, 3387.2605, 3387.2605, 3387.2605, 3387.2605] +25-08-28 08:00:35 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:00:35 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:00:35 | D | - sum error = [ 5099.9422, 5148.5042, 5330.2885, 5639.8848] +25-08-28 08:00:35 | D | - best error = [ 3387.2605, 3387.2605, 3387.2605, 3387.2605] +25-08-28 08:00:35 | D | + error = 3387.2605 +25-08-28 08:00:35 | D | + scale = [min=0.4591, max=2.0268] +25-08-28 08:00:35 | D | - transformer_blocks.6.attn add_qkv_proj +25-08-28 08:00:35 | D | + w: sint4 +25-08-28 08:00:35 | D | + x: sint4 +25-08-28 08:00:35 | D | + y: None +25-08-28 08:00:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:00:35 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:00:35 | D | + x - AbsMax +25-08-28 08:00:35 | D | + x = [min=0.1289, max=19.2500] +25-08-28 08:00:35 | D | + w - AbsMax +25-08-28 08:00:35 | D | + w = [min=0.1143, max=0.5195] +25-08-28 08:00:35 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:00:36 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:01:40 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:01:40 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:01:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:01:40 | D | - sum error = [ 1772.6313, 1701.3121, 1646.8133, 1615.7944, 1500.7003] +25-08-28 08:01:40 | D | - best error = [ 1772.6313, 1701.3121, 1646.8133, 1615.7944, 1500.7003] +25-08-28 08:01:40 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:01:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:01:40 | D | - sum error = [ 1487.0173, 1416.7449, 1374.9109, 1331.2173, 1304.5309] +25-08-28 08:01:40 | D | - best error = [ 1487.0173, 1416.7449, 1374.9109, 1331.2173, 1304.5309] +25-08-28 08:01:40 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:01:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:01:40 | D | - sum error = [ 1282.2840, 1283.9867, 1281.3335, 1308.5006, 1412.3623] +25-08-28 08:01:40 | D | - best error = [ 1282.2840, 1282.2840, 1281.3335, 1281.3335, 1281.3335] +25-08-28 08:01:40 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:01:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:01:40 | D | - sum error = [ 1422.6172, 1465.4928, 1570.8397, 1620.0657, 1698.4194] +25-08-28 08:01:40 | D | - best error = [ 1281.3335, 1281.3335, 1281.3335, 1281.3335, 1281.3335] +25-08-28 08:01:40 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:01:40 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:01:40 | D | - sum error = [ 2130.5368, 1958.6547, 1892.3937, 1796.7290, 1737.3664] +25-08-28 08:01:40 | D | - best error = [ 1281.3335, 1281.3335, 1281.3335, 1281.3335, 1281.3335] +25-08-28 08:01:40 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:01:40 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:01:40 | D | - sum error = [ 1640.8100, 1548.8827, 1501.6737, 1448.5051, 1399.6707] +25-08-28 08:01:40 | D | - best error = [ 1281.3335, 1281.3335, 1281.3335, 1281.3335, 1281.3335] +25-08-28 08:01:40 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:01:40 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:01:40 | D | - sum error = [ 1358.0692, 1344.8782, 1377.9670, 1391.5130, 1449.4868] +25-08-28 08:01:40 | D | - best error = [ 1281.3335, 1281.3335, 1281.3335, 1281.3335, 1281.3335] +25-08-28 08:01:40 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:01:40 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:01:40 | D | - sum error = [ 1450.5069, 1504.6750, 1611.1373, 1703.5418] +25-08-28 08:01:40 | D | - best error = [ 1281.3335, 1281.3335, 1281.3335, 1281.3335] +25-08-28 08:01:40 | D | + error = 1281.3335 +25-08-28 08:01:40 | D | + scale = [min=0.2925, max=5.8974] +25-08-28 08:01:40 | D | - transformer_blocks.6.attn.out_proj + transformer_blocks.6.attn.add_out_proj +25-08-28 08:01:40 | D | + w: sint4 +25-08-28 08:01:40 | D | + x: sint4 +25-08-28 08:01:40 | D | + y: None +25-08-28 08:01:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:01:40 | D | + finished parsing calibration arguments, ram usage: 17.5 +25-08-28 08:01:41 | D | + x - AbsMax +25-08-28 08:01:41 | D | + x = [min=0.9883, max=12.0000] +25-08-28 08:01:41 | D | + w - AbsMax +25-08-28 08:01:41 | D | + w = [min=0.1226, max=0.4727] +25-08-28 08:01:41 | D | + finished resetting calibrator, ram usage: 17.6 +25-08-28 08:01:42 | D | + finished calculating the original outputs, ram usage: 17.8 +25-08-28 08:03:27 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:03:27 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:03:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:03:27 | D | - sum error = [ 4611.8933, 4588.9408, 4562.9907, 4556.8564, 4537.5628] +25-08-28 08:03:27 | D | - best error = [ 4611.8933, 4588.9408, 4562.9907, 4556.8564, 4537.5628] +25-08-28 08:03:27 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:03:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:03:27 | D | - sum error = [ 4541.1532, 4554.7200, 4563.4551, 4567.8560, 4600.6656] +25-08-28 08:03:27 | D | - best error = [ 4537.5628, 4537.5628, 4537.5628, 4537.5628, 4537.5628] +25-08-28 08:03:27 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:03:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:03:27 | D | - sum error = [ 4629.2917, 4670.8224, 4729.6896, 4778.6165, 4841.9407] +25-08-28 08:03:27 | D | - best error = [ 4537.5628, 4537.5628, 4537.5628, 4537.5628, 4537.5628] +25-08-28 08:03:27 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:03:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:03:27 | D | - sum error = [ 4931.2153, 5004.8175, 5093.5968, 5196.2900, 5283.5937] +25-08-28 08:03:27 | D | - best error = [ 4537.5628, 4537.5628, 4537.5628, 4537.5628, 4537.5628] +25-08-28 08:03:27 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:03:27 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:03:27 | D | - sum error = [ 4990.1687, 4906.0868, 4836.9856, 4799.8390, 4753.7835] +25-08-28 08:03:27 | D | - best error = [ 4537.5628, 4537.5628, 4537.5628, 4537.5628, 4537.5628] +25-08-28 08:03:27 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:03:27 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:03:27 | D | - sum error = [ 4713.6065, 4718.4936, 4686.6244, 4704.6562, 4711.4526] +25-08-28 08:03:27 | D | - best error = [ 4537.5628, 4537.5628, 4537.5628, 4537.5628, 4537.5628] +25-08-28 08:03:27 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:03:27 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:03:27 | D | - sum error = [ 4724.5660, 4743.5364, 4804.5537, 4843.4087, 4915.3824] +25-08-28 08:03:27 | D | - best error = [ 4537.5628, 4537.5628, 4537.5628, 4537.5628, 4537.5628] +25-08-28 08:03:27 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:03:27 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:03:27 | D | - sum error = [ 4984.8916, 5079.6569, 5175.5690, 5284.1062] +25-08-28 08:03:27 | D | - best error = [ 4537.5628, 4537.5628, 4537.5628, 4537.5628] +25-08-28 08:03:27 | D | + error = 4537.5628 +25-08-28 08:03:27 | D | + scale = [min=0.9976, max=1.6438] +25-08-28 08:03:27 | D | - transformer_blocks.6.ff.up_proj +25-08-28 08:03:27 | D | + w: sint4 +25-08-28 08:03:27 | D | + x: sint4 +25-08-28 08:03:27 | D | + y: None +25-08-28 08:03:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:03:27 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:03:28 | D | + x - AbsMax +25-08-28 08:03:28 | D | + x = [min=0.0532, max=8.0000] +25-08-28 08:03:28 | D | + w - AbsMax +25-08-28 08:03:28 | D | + w = [min=0.1089, max=0.5156] +25-08-28 08:03:28 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:03:29 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:04:46 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:04:46 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:04:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:04:46 | D | - sum error = [12449.4003, 12021.0455, 11656.8407, 11325.5990, 11044.6443] +25-08-28 08:04:46 | D | - best error = [12449.4003, 12021.0455, 11656.8407, 11325.5990, 11044.6443] +25-08-28 08:04:46 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:04:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:04:46 | D | - sum error = [10783.0905, 10589.2587, 10427.1964, 10308.2701, 10236.5025] +25-08-28 08:04:46 | D | - best error = [10783.0905, 10589.2587, 10427.1964, 10308.2701, 10236.5025] +25-08-28 08:04:46 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:04:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:04:46 | D | - sum error = [10191.7335, 10196.4457, 10254.4012, 10326.1656, 10441.6221] +25-08-28 08:04:46 | D | - best error = [10191.7335, 10191.7335, 10191.7335, 10191.7335, 10191.7335] +25-08-28 08:04:46 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:04:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:04:46 | D | - sum error = [10619.9119, 10840.7738, 11121.7801, 11494.1905, 11982.2275] +25-08-28 08:04:46 | D | - best error = [10191.7335, 10191.7335, 10191.7335, 10191.7335, 10191.7335] +25-08-28 08:04:46 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:04:46 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:04:46 | D | - sum error = [13341.4860, 12792.0775, 12290.2589, 11870.9036, 11492.1274] +25-08-28 08:04:46 | D | - best error = [10191.7335, 10191.7335, 10191.7335, 10191.7335, 10191.7335] +25-08-28 08:04:46 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:04:46 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:04:46 | D | - sum error = [11174.3692, 10898.9012, 10689.3375, 10543.2124, 10420.5973] +25-08-28 08:04:46 | D | - best error = [10191.7335, 10191.7335, 10191.7335, 10191.7335, 10191.7335] +25-08-28 08:04:46 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:04:46 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:04:46 | D | - sum error = [10343.2072, 10341.5156, 10380.4878, 10472.7670, 10623.2089] +25-08-28 08:04:46 | D | - best error = [10191.7335, 10191.7335, 10191.7335, 10191.7335, 10191.7335] +25-08-28 08:04:46 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:04:46 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:04:46 | D | - sum error = [10806.0794, 11083.6734, 11443.5236, 11935.0013] +25-08-28 08:04:46 | D | - best error = [10191.7335, 10191.7335, 10191.7335, 10191.7335] +25-08-28 08:04:46 | D | + error = 10191.7335 +25-08-28 08:04:46 | D | + scale = [min=0.2307, max=2.8284] +25-08-28 08:04:46 | D | - transformer_blocks.6.ff.down_proj +25-08-28 08:04:46 | D | + w: sint4 +25-08-28 08:04:46 | D | + x: uint4 +25-08-28 08:04:46 | D | + y: None +25-08-28 08:04:46 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:04:46 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:04:49 | D | + x - AbsMax +25-08-28 08:04:49 | D | + x = [min=0.1719, max=37.0000] +25-08-28 08:04:49 | D | + w - AbsMax +25-08-28 08:04:49 | D | + w = [min=0.0554, max=0.8047] +25-08-28 08:04:49 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:04:51 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:06:41 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:06:41 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:06:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:06:41 | D | - sum error = [ 7987.2887, 7951.0983, 7916.3317, 7938.5638, 7973.1497] +25-08-28 08:06:41 | D | - best error = [ 7987.2887, 7951.0983, 7916.3317, 7916.3317, 7916.3317] +25-08-28 08:06:41 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:06:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:06:41 | D | - sum error = [ 8065.6784, 8192.6260, 8394.0274, 8695.0754, 9109.7943] +25-08-28 08:06:41 | D | - best error = [ 7916.3317, 7916.3317, 7916.3317, 7916.3317, 7916.3317] +25-08-28 08:06:41 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:06:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:06:41 | D | - sum error = [ 9768.6883, 10900.1123, 12674.6222, 14727.7404, 17121.6887] +25-08-28 08:06:41 | D | - best error = [ 7916.3317, 7916.3317, 7916.3317, 7916.3317, 7916.3317] +25-08-28 08:06:41 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:06:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:06:41 | D | - sum error = [19295.6808, 21990.7493, 25239.9949, 27254.5747, 29426.3871] +25-08-28 08:06:41 | D | - best error = [ 7916.3317, 7916.3317, 7916.3317, 7916.3317, 7916.3317] +25-08-28 08:06:41 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:06:41 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:06:41 | D | - sum error = [ 9172.6799, 8937.4815, 8746.3666, 8634.3347, 8511.6950] +25-08-28 08:06:41 | D | - best error = [ 7916.3317, 7916.3317, 7916.3317, 7916.3317, 7916.3317] +25-08-28 08:06:41 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:06:41 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:06:41 | D | - sum error = [ 8510.3772, 8530.3688, 8648.5407, 8915.5684, 9329.9183] +25-08-28 08:06:41 | D | - best error = [ 7916.3317, 7916.3317, 7916.3317, 7916.3317, 7916.3317] +25-08-28 08:06:41 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:06:41 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:06:41 | D | - sum error = [10157.3788, 11698.3260, 13583.1366, 16111.8014, 18506.5887] +25-08-28 08:06:41 | D | - best error = [ 7916.3317, 7916.3317, 7916.3317, 7916.3317, 7916.3317] +25-08-28 08:06:41 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:06:41 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:06:41 | D | - sum error = [21328.4156, 24616.7683, 27079.2154, 29261.8840] +25-08-28 08:06:41 | D | - best error = [ 7916.3317, 7916.3317, 7916.3317, 7916.3317] +25-08-28 08:06:41 | D | + error = 7916.3317 +25-08-28 08:06:41 | D | + scale = [min=0.8385, max=1.4349] +25-08-28 08:06:41 | D | - transformer_blocks.6.ff_context.up_proj +25-08-28 08:06:41 | D | + w: sint4 +25-08-28 08:06:41 | D | + x: sint4 +25-08-28 08:06:41 | D | + y: None +25-08-28 08:06:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:06:41 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:06:41 | D | + x - AbsMax +25-08-28 08:06:41 | D | + x = [min=0.1338, max=14.7500] +25-08-28 08:06:41 | D | + w - AbsMax +25-08-28 08:06:41 | D | + w = [min=0.0820, max=0.6172] +25-08-28 08:06:41 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:06:42 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:07:26 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:07:26 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:07:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:07:26 | D | - sum error = [ 5025.9646, 4804.7237, 4595.6603, 4425.6529, 4254.7341] +25-08-28 08:07:26 | D | - best error = [ 5025.9646, 4804.7237, 4595.6603, 4425.6529, 4254.7341] +25-08-28 08:07:26 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:07:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:07:26 | D | - sum error = [ 4103.5451, 3981.6139, 3886.9532, 3788.1983, 3722.2889] +25-08-28 08:07:26 | D | - best error = [ 4103.5451, 3981.6139, 3886.9532, 3788.1983, 3722.2889] +25-08-28 08:07:26 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:07:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:07:26 | D | - sum error = [ 3664.6400, 3636.7088, 3579.1565, 3559.9143, 3588.8257] +25-08-28 08:07:26 | D | - best error = [ 3664.6400, 3636.7088, 3579.1565, 3559.9143, 3559.9143] +25-08-28 08:07:26 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:07:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:07:26 | D | - sum error = [ 3595.2786, 3641.3285, 3700.9746, 3774.8771, 3874.6996] +25-08-28 08:07:26 | D | - best error = [ 3559.9143, 3559.9143, 3559.9143, 3559.9143, 3559.9143] +25-08-28 08:07:26 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:07:26 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:07:26 | D | - sum error = [ 4281.3330, 4084.1989, 3940.2032, 3817.7962, 3719.4639] +25-08-28 08:07:26 | D | - best error = [ 3559.9143, 3559.9143, 3559.9143, 3559.9143, 3559.9143] +25-08-28 08:07:26 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:07:26 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:07:26 | D | - sum error = [ 3621.6488, 3552.1050, 3505.8729, 3459.3447, 3429.4556] +25-08-28 08:07:26 | D | - best error = [ 3559.9143, 3552.1050, 3505.8729, 3459.3447, 3429.4556] +25-08-28 08:07:26 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:07:26 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:07:26 | D | - sum error = [ 3427.4800, 3435.0341, 3437.8886, 3476.6357, 3508.8611] +25-08-28 08:07:26 | D | - best error = [ 3427.4800, 3427.4800, 3427.4800, 3427.4800, 3427.4800] +25-08-28 08:07:26 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:07:26 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:07:26 | D | - sum error = [ 3574.1174, 3647.0691, 3755.1181, 3873.6773] +25-08-28 08:07:26 | D | - best error = [ 3427.4800, 3427.4800, 3427.4800, 3427.4800] +25-08-28 08:07:26 | D | + error = 3427.4800 +25-08-28 08:07:26 | D | + scale = [min=0.7972, max=10.7923] +25-08-28 08:07:26 | D | - transformer_blocks.6.ff_context.down_proj +25-08-28 08:07:26 | D | + w: sint4 +25-08-28 08:07:26 | D | + x: uint4 +25-08-28 08:07:26 | D | + y: None +25-08-28 08:07:26 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:07:26 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:07:26 | D | + x - AbsMax +25-08-28 08:07:26 | D | + x = [min=0.1719, max=27.6250] +25-08-28 08:07:26 | D | + w - AbsMax +25-08-28 08:07:26 | D | + w = [min=0.0215, max=0.5273] +25-08-28 08:07:26 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:07:27 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:08:15 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:08:15 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:08:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:08:15 | D | - sum error = [ 1691.7897, 1729.5989, 1800.2505, 2028.8278, 2456.0230] +25-08-28 08:08:15 | D | - best error = [ 1691.7897, 1691.7897, 1691.7897, 1691.7897, 1691.7897] +25-08-28 08:08:15 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:08:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:08:15 | D | - sum error = [ 3206.4763, 4199.0425, 5284.6814, 6561.9899, 7881.4171] +25-08-28 08:08:15 | D | - best error = [ 1691.7897, 1691.7897, 1691.7897, 1691.7897, 1691.7897] +25-08-28 08:08:15 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:08:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:08:15 | D | - sum error = [ 9185.2996, 10536.3671, 11898.4833, 13219.4378, 14695.0865] +25-08-28 08:08:15 | D | - best error = [ 1691.7897, 1691.7897, 1691.7897, 1691.7897, 1691.7897] +25-08-28 08:08:15 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:08:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:08:15 | D | - sum error = [16121.9731, 17417.0684, 18565.7586, 19289.2078, 20232.0403] +25-08-28 08:08:15 | D | - best error = [ 1691.7897, 1691.7897, 1691.7897, 1691.7897, 1691.7897] +25-08-28 08:08:15 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:08:15 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:08:15 | D | - sum error = [ 1637.9278, 1631.7708, 1661.1023, 1765.4519, 1908.1626] +25-08-28 08:08:15 | D | - best error = [ 1637.9278, 1631.7708, 1631.7708, 1631.7708, 1631.7708] +25-08-28 08:08:15 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:08:15 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:08:15 | D | - sum error = [ 2103.9642, 2328.4802, 2760.8937, 3528.2069, 5189.6389] +25-08-28 08:08:15 | D | - best error = [ 1631.7708, 1631.7708, 1631.7708, 1631.7708, 1631.7708] +25-08-28 08:08:15 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:08:15 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:08:15 | D | - sum error = [ 7391.5211, 9967.6561, 12247.8093, 14166.7696, 15949.3248] +25-08-28 08:08:15 | D | - best error = [ 1631.7708, 1631.7708, 1631.7708, 1631.7708, 1631.7708] +25-08-28 08:08:15 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:08:15 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:08:15 | D | - sum error = [17440.8770, 18493.0517, 19235.6586, 20238.4916] +25-08-28 08:08:15 | D | - best error = [ 1631.7708, 1631.7708, 1631.7708, 1631.7708] +25-08-28 08:08:15 | D | + error = 1631.7708 +25-08-28 08:08:15 | D | + scale = [min=1.7231, max=26.5834] +25-08-28 08:08:35 | D | - Smoothing Diffusion Block transformer_blocks.7 +25-08-28 08:08:35 | D | - Skipping Module transformer_blocks.7.norm1.linear +25-08-28 08:08:35 | D | - Skipping Module transformer_blocks.7.norm1_context.linear +25-08-28 08:08:35 | D | - Smoothing Transformer Block transformer_blocks.7 +25-08-28 08:08:35 | D | - transformer_blocks.7.attn.qkv_proj +25-08-28 08:08:35 | D | + w: sint4 +25-08-28 08:08:35 | D | + x: sint4 +25-08-28 08:08:35 | D | + y: None +25-08-28 08:08:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:08:35 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:08:36 | D | + x - AbsMax +25-08-28 08:08:36 | D | + x = [min=0.0457, max=17.2500] +25-08-28 08:08:36 | D | + w - AbsMax +25-08-28 08:08:36 | D | + w = [min=0.1138, max=0.8438] +25-08-28 08:08:36 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:08:37 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:09:50 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:09:50 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:09:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:09:50 | D | - sum error = [ 5645.4336, 5444.8174, 5167.2087, 4989.2332, 5007.9942] +25-08-28 08:09:50 | D | - best error = [ 5645.4336, 5444.8174, 5167.2087, 4989.2332, 4989.2332] +25-08-28 08:09:50 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:09:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:09:50 | D | - sum error = [ 4876.6786, 4863.6213, 4870.8201, 4847.3277, 4853.5985] +25-08-28 08:09:50 | D | - best error = [ 4876.6786, 4863.6213, 4863.6213, 4847.3277, 4847.3277] +25-08-28 08:09:50 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:09:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:09:50 | D | - sum error = [ 4943.7793, 4933.2725, 5060.8005, 5258.6481, 5354.1378] +25-08-28 08:09:50 | D | - best error = [ 4847.3277, 4847.3277, 4847.3277, 4847.3277, 4847.3277] +25-08-28 08:09:50 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:09:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:09:50 | D | - sum error = [ 5562.7432, 5734.7488, 6041.5859, 6376.7594, 6365.2544] +25-08-28 08:09:50 | D | - best error = [ 4847.3277, 4847.3277, 4847.3277, 4847.3277, 4847.3277] +25-08-28 08:09:50 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:09:50 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:09:50 | D | - sum error = [16837.2387, 14593.2050, 13319.2270, 12132.0857, 10845.7085] +25-08-28 08:09:50 | D | - best error = [ 4847.3277, 4847.3277, 4847.3277, 4847.3277, 4847.3277] +25-08-28 08:09:50 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:09:50 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:09:50 | D | - sum error = [ 9950.7158, 9160.8043, 8136.6367, 7738.2910, 7322.8363] +25-08-28 08:09:50 | D | - best error = [ 4847.3277, 4847.3277, 4847.3277, 4847.3277, 4847.3277] +25-08-28 08:09:50 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:09:50 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:09:50 | D | - sum error = [ 7024.9868, 6899.5887, 6476.7177, 6515.2163, 6355.6155] +25-08-28 08:09:50 | D | - best error = [ 4847.3277, 4847.3277, 4847.3277, 4847.3277, 4847.3277] +25-08-28 08:09:50 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:09:50 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:09:50 | D | - sum error = [ 6464.8842, 6355.7495, 6550.9083, 6559.5387] +25-08-28 08:09:50 | D | - best error = [ 4847.3277, 4847.3277, 4847.3277, 4847.3277] +25-08-28 08:09:50 | D | + error = 4847.3277 +25-08-28 08:09:50 | D | + scale = [min=0.2909, max=3.1240] +25-08-28 08:09:51 | D | - transformer_blocks.7.attn add_qkv_proj +25-08-28 08:09:51 | D | + w: sint4 +25-08-28 08:09:51 | D | + x: sint4 +25-08-28 08:09:51 | D | + y: None +25-08-28 08:09:51 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:09:51 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:09:51 | D | + x - AbsMax +25-08-28 08:09:51 | D | + x = [min=0.1147, max=29.0000] +25-08-28 08:09:51 | D | + w - AbsMax +25-08-28 08:09:51 | D | + w = [min=0.1069, max=0.4434] +25-08-28 08:09:51 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:09:52 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:10:57 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:10:57 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:10:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:10:57 | D | - sum error = [ 1631.6647, 1558.2298, 1496.5055, 1446.3985, 1353.3144] +25-08-28 08:10:57 | D | - best error = [ 1631.6647, 1558.2298, 1496.5055, 1446.3985, 1353.3144] +25-08-28 08:10:57 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:10:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:10:57 | D | - sum error = [ 1268.6304, 1199.5911, 1174.0918, 1109.3376, 1081.9289] +25-08-28 08:10:57 | D | - best error = [ 1268.6304, 1199.5911, 1174.0918, 1109.3376, 1081.9289] +25-08-28 08:10:57 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:10:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:10:57 | D | - sum error = [ 1065.7328, 1037.4983, 1037.8262, 1042.6954, 1047.9221] +25-08-28 08:10:57 | D | - best error = [ 1065.7328, 1037.4983, 1037.4983, 1037.4983, 1037.4983] +25-08-28 08:10:57 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:10:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:10:57 | D | - sum error = [ 1084.6911, 1143.1013, 1182.2323, 1273.4645, 1360.3315] +25-08-28 08:10:57 | D | - best error = [ 1037.4983, 1037.4983, 1037.4983, 1037.4983, 1037.4983] +25-08-28 08:10:57 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:10:57 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:10:57 | D | - sum error = [ 2160.5852, 1992.3507, 1853.6778, 1744.9725, 1623.9246] +25-08-28 08:10:57 | D | - best error = [ 1037.4983, 1037.4983, 1037.4983, 1037.4983, 1037.4983] +25-08-28 08:10:57 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:10:57 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:10:57 | D | - sum error = [ 1509.3426, 1413.3141, 1336.2086, 1248.1025, 1196.8072] +25-08-28 08:10:57 | D | - best error = [ 1037.4983, 1037.4983, 1037.4983, 1037.4983, 1037.4983] +25-08-28 08:10:57 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:10:57 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:10:57 | D | - sum error = [ 1145.3233, 1114.2316, 1080.1825, 1078.8752, 1114.9148] +25-08-28 08:10:57 | D | - best error = [ 1037.4983, 1037.4983, 1037.4983, 1037.4983, 1037.4983] +25-08-28 08:10:57 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:10:57 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:10:57 | D | - sum error = [ 1128.6480, 1191.8402, 1262.5506, 1355.1707] +25-08-28 08:10:57 | D | - best error = [ 1037.4983, 1037.4983, 1037.4983, 1037.4983] +25-08-28 08:10:57 | D | + error = 1037.4983 +25-08-28 08:10:57 | D | + scale = [min=0.3040, max=6.3726] +25-08-28 08:10:57 | D | - transformer_blocks.7.attn.out_proj + transformer_blocks.7.attn.add_out_proj +25-08-28 08:10:57 | D | + w: sint4 +25-08-28 08:10:57 | D | + x: sint4 +25-08-28 08:10:57 | D | + y: None +25-08-28 08:10:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:10:57 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:10:57 | D | + x - AbsMax +25-08-28 08:10:57 | D | + x = [min=0.8516, max=10.5000] +25-08-28 08:10:57 | D | + w - AbsMax +25-08-28 08:10:57 | D | + w = [min=0.1235, max=0.3750] +25-08-28 08:10:57 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:10:58 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:12:44 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:12:44 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:12:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:12:44 | D | - sum error = [ 4835.2125, 4836.8945, 4815.4566, 4815.1060, 4838.2086] +25-08-28 08:12:44 | D | - best error = [ 4835.2125, 4835.2125, 4815.4566, 4815.1060, 4815.1060] +25-08-28 08:12:44 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:12:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:12:44 | D | - sum error = [ 4848.1405, 4846.9846, 4881.6450, 4924.2924, 4959.2510] +25-08-28 08:12:44 | D | - best error = [ 4815.1060, 4815.1060, 4815.1060, 4815.1060, 4815.1060] +25-08-28 08:12:44 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:12:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:12:44 | D | - sum error = [ 5003.0335, 5048.2504, 5089.9870, 5145.9328, 5202.1892] +25-08-28 08:12:44 | D | - best error = [ 4815.1060, 4815.1060, 4815.1060, 4815.1060, 4815.1060] +25-08-28 08:12:44 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:12:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:12:44 | D | - sum error = [ 5265.4022, 5344.4880, 5398.6308, 5492.8754, 5597.6602] +25-08-28 08:12:44 | D | - best error = [ 4815.1060, 4815.1060, 4815.1060, 4815.1060, 4815.1060] +25-08-28 08:12:44 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:12:44 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:12:44 | D | - sum error = [ 5148.3265, 5102.7310, 5076.8900, 5043.9768, 5029.4340] +25-08-28 08:12:44 | D | - best error = [ 4815.1060, 4815.1060, 4815.1060, 4815.1060, 4815.1060] +25-08-28 08:12:44 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:12:44 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:12:44 | D | - sum error = [ 5007.6573, 5013.1675, 5033.0118, 5053.9788, 5049.5568] +25-08-28 08:12:44 | D | - best error = [ 4815.1060, 4815.1060, 4815.1060, 4815.1060, 4815.1060] +25-08-28 08:12:44 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:12:44 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:12:44 | D | - sum error = [ 5086.0991, 5120.0951, 5161.8671, 5204.0603, 5261.8883] +25-08-28 08:12:44 | D | - best error = [ 4815.1060, 4815.1060, 4815.1060, 4815.1060, 4815.1060] +25-08-28 08:12:44 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:12:44 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:12:44 | D | - sum error = [ 5327.3928, 5408.2402, 5485.5499, 5594.8637] +25-08-28 08:12:44 | D | - best error = [ 4815.1060, 4815.1060, 4815.1060, 4815.1060] +25-08-28 08:12:44 | D | + error = 4815.1060 +25-08-28 08:12:44 | D | + scale = [min=0.9762, max=1.4229] +25-08-28 08:12:44 | D | - transformer_blocks.7.ff.up_proj +25-08-28 08:12:44 | D | + w: sint4 +25-08-28 08:12:44 | D | + x: sint4 +25-08-28 08:12:44 | D | + y: None +25-08-28 08:12:44 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:12:44 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:12:45 | D | + x - AbsMax +25-08-28 08:12:45 | D | + x = [min=0.0747, max=14.9375] +25-08-28 08:12:45 | D | + w - AbsMax +25-08-28 08:12:45 | D | + w = [min=0.1094, max=0.4375] +25-08-28 08:12:45 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:12:46 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:14:02 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:14:02 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:14:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:14:02 | D | - sum error = [11539.3414, 11228.3156, 10942.9846, 10701.0018, 10488.7795] +25-08-28 08:14:02 | D | - best error = [11539.3414, 11228.3156, 10942.9846, 10701.0018, 10488.7795] +25-08-28 08:14:02 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:14:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:14:02 | D | - sum error = [10298.1913, 10157.6025, 10042.2695, 9960.0867, 9913.8905] +25-08-28 08:14:02 | D | - best error = [10298.1913, 10157.6025, 10042.2695, 9960.0867, 9913.8905] +25-08-28 08:14:02 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:14:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:14:02 | D | - sum error = [ 9887.4357, 9904.0275, 9937.9499, 10015.9161, 10118.7292] +25-08-28 08:14:02 | D | - best error = [ 9887.4357, 9887.4357, 9887.4357, 9887.4357, 9887.4357] +25-08-28 08:14:02 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:14:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:14:02 | D | - sum error = [10264.1473, 10431.3673, 10647.3700, 10923.8363, 11259.7389] +25-08-28 08:14:02 | D | - best error = [ 9887.4357, 9887.4357, 9887.4357, 9887.4357, 9887.4357] +25-08-28 08:14:02 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:14:02 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:14:02 | D | - sum error = [12685.8239, 12191.5111, 11776.9116, 11398.3655, 11091.8972] +25-08-28 08:14:02 | D | - best error = [ 9887.4357, 9887.4357, 9887.4357, 9887.4357, 9887.4357] +25-08-28 08:14:02 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:14:02 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:14:02 | D | - sum error = [10810.5715, 10574.7613, 10396.2666, 10251.0078, 10151.7530] +25-08-28 08:14:02 | D | - best error = [ 9887.4357, 9887.4357, 9887.4357, 9887.4357, 9887.4357] +25-08-28 08:14:02 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:14:02 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:14:02 | D | - sum error = [10098.1357, 10069.7388, 10087.3116, 10146.2415, 10264.9368] +25-08-28 08:14:02 | D | - best error = [ 9887.4357, 9887.4357, 9887.4357, 9887.4357, 9887.4357] +25-08-28 08:14:02 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:14:02 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:14:02 | D | - sum error = [10407.0949, 10614.9227, 10874.5402, 11214.7913] +25-08-28 08:14:02 | D | - best error = [ 9887.4357, 9887.4357, 9887.4357, 9887.4357] +25-08-28 08:14:02 | D | + error = 9887.4357 +25-08-28 08:14:02 | D | + scale = [min=0.2733, max=3.8649] +25-08-28 08:14:02 | D | - transformer_blocks.7.ff.down_proj +25-08-28 08:14:02 | D | + w: sint4 +25-08-28 08:14:02 | D | + x: uint4 +25-08-28 08:14:02 | D | + y: None +25-08-28 08:14:02 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:14:02 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:14:05 | D | + x - AbsMax +25-08-28 08:14:05 | D | + x = [min=0.1719, max=57.2500] +25-08-28 08:14:05 | D | + w - AbsMax +25-08-28 08:14:05 | D | + w = [min=0.1045, max=0.9414] +25-08-28 08:14:05 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:14:07 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:15:58 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:15:58 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:15:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:15:58 | D | - sum error = [ 8663.1385, 8634.6724, 8666.6778, 8663.8883, 8712.0615] +25-08-28 08:15:58 | D | - best error = [ 8663.1385, 8634.6724, 8634.6724, 8634.6724, 8634.6724] +25-08-28 08:15:58 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:15:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:15:58 | D | - sum error = [ 8760.7173, 8824.2484, 8881.4756, 9046.5330, 9206.9157] +25-08-28 08:15:58 | D | - best error = [ 8634.6724, 8634.6724, 8634.6724, 8634.6724, 8634.6724] +25-08-28 08:15:58 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:15:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:15:58 | D | - sum error = [ 9381.5129, 9643.1211, 9881.2955, 10341.2986, 10781.8766] +25-08-28 08:15:58 | D | - best error = [ 8634.6724, 8634.6724, 8634.6724, 8634.6724, 8634.6724] +25-08-28 08:15:58 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:15:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:15:58 | D | - sum error = [11273.3555, 12382.7661, 14154.5888, 16385.4882, 19608.5394] +25-08-28 08:15:58 | D | - best error = [ 8634.6724, 8634.6724, 8634.6724, 8634.6724, 8634.6724] +25-08-28 08:15:58 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:15:58 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:15:58 | D | - sum error = [ 9714.3284, 9537.9876, 9383.5456, 9179.4427, 9064.8654] +25-08-28 08:15:58 | D | - best error = [ 8634.6724, 8634.6724, 8634.6724, 8634.6724, 8634.6724] +25-08-28 08:15:58 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:15:58 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:15:58 | D | - sum error = [ 9022.4759, 8992.7193, 8960.3554, 9052.1056, 9160.2367] +25-08-28 08:15:58 | D | - best error = [ 8634.6724, 8634.6724, 8634.6724, 8634.6724, 8634.6724] +25-08-28 08:15:58 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:15:58 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:15:58 | D | - sum error = [ 9345.1720, 9597.3971, 10010.5273, 10462.0510, 10896.6747] +25-08-28 08:15:58 | D | - best error = [ 8634.6724, 8634.6724, 8634.6724, 8634.6724, 8634.6724] +25-08-28 08:15:58 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:15:58 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:15:58 | D | - sum error = [11834.7245, 13593.6879, 15908.3470, 19226.0914] +25-08-28 08:15:58 | D | - best error = [ 8634.6724, 8634.6724, 8634.6724, 8634.6724] +25-08-28 08:15:58 | D | + error = 8634.6724 +25-08-28 08:15:58 | D | + scale = [min=0.9157, max=1.2243] +25-08-28 08:15:58 | D | - transformer_blocks.7.ff_context.up_proj +25-08-28 08:15:58 | D | + w: sint4 +25-08-28 08:15:58 | D | + x: sint4 +25-08-28 08:15:58 | D | + y: None +25-08-28 08:15:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:15:58 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:15:58 | D | + x - AbsMax +25-08-28 08:15:58 | D | + x = [min=0.1582, max=8.7500] +25-08-28 08:15:58 | D | + w - AbsMax +25-08-28 08:15:58 | D | + w = [min=0.0786, max=0.5391] +25-08-28 08:15:58 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:15:59 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:16:43 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:16:43 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:16:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:16:43 | D | - sum error = [ 5461.2169, 5239.5158, 4995.7040, 4793.6946, 4610.9654] +25-08-28 08:16:43 | D | - best error = [ 5461.2169, 5239.5158, 4995.7040, 4793.6946, 4610.9654] +25-08-28 08:16:43 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:16:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:16:43 | D | - sum error = [ 4454.8419, 4325.9979, 4203.2609, 4093.7236, 3994.5995] +25-08-28 08:16:43 | D | - best error = [ 4454.8419, 4325.9979, 4203.2609, 4093.7236, 3994.5995] +25-08-28 08:16:43 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:16:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:16:43 | D | - sum error = [ 3913.6110, 3848.1468, 3815.3168, 3796.6734, 3794.6002] +25-08-28 08:16:43 | D | - best error = [ 3913.6110, 3848.1468, 3815.3168, 3796.6734, 3794.6002] +25-08-28 08:16:43 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:16:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:16:43 | D | - sum error = [ 3808.3330, 3833.2915, 3881.5501, 3951.5896, 4022.6618] +25-08-28 08:16:43 | D | - best error = [ 3794.6002, 3794.6002, 3794.6002, 3794.6002, 3794.6002] +25-08-28 08:16:43 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:16:43 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:16:43 | D | - sum error = [ 4461.3455, 4294.1756, 4151.7823, 4018.7611, 3930.7858] +25-08-28 08:16:43 | D | - best error = [ 3794.6002, 3794.6002, 3794.6002, 3794.6002, 3794.6002] +25-08-28 08:16:43 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:16:43 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:16:43 | D | - sum error = [ 3841.2192, 3771.9087, 3708.1788, 3675.5941, 3641.8413] +25-08-28 08:16:43 | D | - best error = [ 3794.6002, 3771.9087, 3708.1788, 3675.5941, 3641.8413] +25-08-28 08:16:43 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:16:43 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:16:43 | D | - sum error = [ 3638.1734, 3643.4872, 3646.7671, 3669.9170, 3700.4581] +25-08-28 08:16:43 | D | - best error = [ 3638.1734, 3638.1734, 3638.1734, 3638.1734, 3638.1734] +25-08-28 08:16:43 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:16:43 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:16:43 | D | - sum error = [ 3763.6501, 3842.6327, 3919.9300, 4005.7039] +25-08-28 08:16:43 | D | - best error = [ 3638.1734, 3638.1734, 3638.1734, 3638.1734] +25-08-28 08:16:43 | D | + error = 3638.1734 +25-08-28 08:16:43 | D | + scale = [min=0.7106, max=8.9314] +25-08-28 08:16:43 | D | - transformer_blocks.7.ff_context.down_proj +25-08-28 08:16:43 | D | + w: sint4 +25-08-28 08:16:43 | D | + x: uint4 +25-08-28 08:16:43 | D | + y: None +25-08-28 08:16:43 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:16:43 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:16:43 | D | + x - AbsMax +25-08-28 08:16:43 | D | + x = [min=0.1719, max=45.5000] +25-08-28 08:16:43 | D | + w - AbsMax +25-08-28 08:16:43 | D | + w = [min=0.0211, max=0.5859] +25-08-28 08:16:43 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:16:44 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:17:32 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:17:32 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:17:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:17:32 | D | - sum error = [ 1769.0574, 1768.8658, 1814.4959, 1948.2143, 2237.2931] +25-08-28 08:17:32 | D | - best error = [ 1769.0574, 1768.8658, 1768.8658, 1768.8658, 1768.8658] +25-08-28 08:17:32 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:17:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:17:32 | D | - sum error = [ 2653.8924, 3289.0862, 4080.8125, 4862.6777, 5695.1272] +25-08-28 08:17:32 | D | - best error = [ 1768.8658, 1768.8658, 1768.8658, 1768.8658, 1768.8658] +25-08-28 08:17:32 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:17:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:17:32 | D | - sum error = [ 6571.2857, 7512.3067, 8611.9721, 9727.8273, 11131.6029] +25-08-28 08:17:32 | D | - best error = [ 1768.8658, 1768.8658, 1768.8658, 1768.8658, 1768.8658] +25-08-28 08:17:32 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:17:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:17:32 | D | - sum error = [12381.0365, 13608.1546, 14904.4262, 15776.6491, 16713.7232] +25-08-28 08:17:32 | D | - best error = [ 1768.8658, 1768.8658, 1768.8658, 1768.8658, 1768.8658] +25-08-28 08:17:32 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:17:32 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:17:32 | D | - sum error = [ 1719.3360, 1708.3894, 1736.9823, 1847.9221, 2013.9618] +25-08-28 08:17:32 | D | - best error = [ 1719.3360, 1708.3894, 1708.3894, 1708.3894, 1708.3894] +25-08-28 08:17:32 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:17:32 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:17:32 | D | - sum error = [ 2202.9850, 2504.6181, 2867.2591, 3473.9760, 4399.4736] +25-08-28 08:17:32 | D | - best error = [ 1708.3894, 1708.3894, 1708.3894, 1708.3894, 1708.3894] +25-08-28 08:17:32 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:17:32 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:17:32 | D | - sum error = [ 5732.1982, 7500.1376, 9179.1699, 10762.9427, 12329.4230] +25-08-28 08:17:32 | D | - best error = [ 1708.3894, 1708.3894, 1708.3894, 1708.3894, 1708.3894] +25-08-28 08:17:32 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:17:32 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:17:32 | D | - sum error = [13648.9742, 14852.2171, 15803.2093, 16763.0813] +25-08-28 08:17:32 | D | - best error = [ 1708.3894, 1708.3894, 1708.3894, 1708.3894] +25-08-28 08:17:32 | D | + error = 1708.3894 +25-08-28 08:17:32 | D | + scale = [min=1.3566, max=26.9979] +25-08-28 08:17:53 | D | - Smoothing Diffusion Block transformer_blocks.8 +25-08-28 08:17:53 | D | - Skipping Module transformer_blocks.8.norm1.linear +25-08-28 08:17:53 | D | - Skipping Module transformer_blocks.8.norm1_context.linear +25-08-28 08:17:53 | D | - Smoothing Transformer Block transformer_blocks.8 +25-08-28 08:17:53 | D | - transformer_blocks.8.attn.qkv_proj +25-08-28 08:17:53 | D | + w: sint4 +25-08-28 08:17:53 | D | + x: sint4 +25-08-28 08:17:53 | D | + y: None +25-08-28 08:17:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:17:53 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:17:53 | D | + x - AbsMax +25-08-28 08:17:53 | D | + x = [min=0.0452, max=15.0000] +25-08-28 08:17:53 | D | + w - AbsMax +25-08-28 08:17:53 | D | + w = [min=0.1123, max=0.6406] +25-08-28 08:17:53 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:17:55 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:19:08 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:19:08 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:19:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:19:08 | D | - sum error = [ 5943.7433, 5731.4054, 5607.9914, 5374.5534, 5199.1707] +25-08-28 08:19:08 | D | - best error = [ 5943.7433, 5731.4054, 5607.9914, 5374.5534, 5199.1707] +25-08-28 08:19:08 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:19:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:19:08 | D | - sum error = [ 5143.7902, 5145.5859, 5080.1913, 5026.1882, 5097.9060] +25-08-28 08:19:08 | D | - best error = [ 5143.7902, 5143.7902, 5080.1913, 5026.1882, 5026.1882] +25-08-28 08:19:08 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:19:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:19:08 | D | - sum error = [ 5106.4704, 5104.2175, 5105.3930, 5112.7175, 5066.8539] +25-08-28 08:19:08 | D | - best error = [ 5026.1882, 5026.1882, 5026.1882, 5026.1882, 5026.1882] +25-08-28 08:19:08 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:19:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:19:08 | D | - sum error = [ 5234.6413, 5327.0846, 5453.5477, 5702.4596, 5999.7817] +25-08-28 08:19:08 | D | - best error = [ 5026.1882, 5026.1882, 5026.1882, 5026.1882, 5026.1882] +25-08-28 08:19:08 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:19:08 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:19:08 | D | - sum error = [17757.0088, 15536.3754, 14014.0940, 12472.0141, 11359.3384] +25-08-28 08:19:08 | D | - best error = [ 5026.1882, 5026.1882, 5026.1882, 5026.1882, 5026.1882] +25-08-28 08:19:08 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:19:08 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:19:08 | D | - sum error = [10425.7220, 9495.0631, 8764.6184, 8303.8815, 7745.1030] +25-08-28 08:19:08 | D | - best error = [ 5026.1882, 5026.1882, 5026.1882, 5026.1882, 5026.1882] +25-08-28 08:19:08 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:19:08 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:19:08 | D | - sum error = [ 7483.4551, 7042.7983, 6805.6582, 6399.6925, 6106.3901] +25-08-28 08:19:08 | D | - best error = [ 5026.1882, 5026.1882, 5026.1882, 5026.1882, 5026.1882] +25-08-28 08:19:08 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:19:08 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:19:08 | D | - sum error = [ 6072.3445, 6075.3295, 6063.7190, 6212.6867] +25-08-28 08:19:08 | D | - best error = [ 5026.1882, 5026.1882, 5026.1882, 5026.1882] +25-08-28 08:19:08 | D | + error = 5026.1882 +25-08-28 08:19:08 | D | + scale = [min=0.2897, max=2.9542] +25-08-28 08:19:08 | D | - transformer_blocks.8.attn add_qkv_proj +25-08-28 08:19:08 | D | + w: sint4 +25-08-28 08:19:08 | D | + x: sint4 +25-08-28 08:19:08 | D | + y: None +25-08-28 08:19:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:19:08 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:19:08 | D | + x - AbsMax +25-08-28 08:19:08 | D | + x = [min=0.1060, max=19.1250] +25-08-28 08:19:08 | D | + w - AbsMax +25-08-28 08:19:08 | D | + w = [min=0.1030, max=0.4082] +25-08-28 08:19:08 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:19:09 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:20:14 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:20:14 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:20:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:20:14 | D | - sum error = [ 2000.7498, 1912.7688, 1787.2242, 1732.0513, 1661.2358] +25-08-28 08:20:14 | D | - best error = [ 2000.7498, 1912.7688, 1787.2242, 1732.0513, 1661.2358] +25-08-28 08:20:14 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:20:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:20:14 | D | - sum error = [ 1576.8863, 1520.5663, 1460.8694, 1439.9608, 1404.5649] +25-08-28 08:20:14 | D | - best error = [ 1576.8863, 1520.5663, 1460.8694, 1439.9608, 1404.5649] +25-08-28 08:20:14 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:20:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:20:14 | D | - sum error = [ 1369.6340, 1347.3964, 1353.5289, 1383.0456, 1392.5645] +25-08-28 08:20:14 | D | - best error = [ 1369.6340, 1347.3964, 1347.3964, 1347.3964, 1347.3964] +25-08-28 08:20:14 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:20:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:20:14 | D | - sum error = [ 1399.1230, 1410.6996, 1475.6852, 1576.6453, 1635.4631] +25-08-28 08:20:14 | D | - best error = [ 1347.3964, 1347.3964, 1347.3964, 1347.3964, 1347.3964] +25-08-28 08:20:14 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:20:14 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:20:14 | D | - sum error = [ 2435.6157, 2321.8195, 2241.9506, 2048.2274, 1939.2818] +25-08-28 08:20:14 | D | - best error = [ 1347.3964, 1347.3964, 1347.3964, 1347.3964, 1347.3964] +25-08-28 08:20:14 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:20:14 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:20:14 | D | - sum error = [ 1841.4748, 1744.7124, 1658.0483, 1627.4523, 1540.7922] +25-08-28 08:20:14 | D | - best error = [ 1347.3964, 1347.3964, 1347.3964, 1347.3964, 1347.3964] +25-08-28 08:20:14 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:20:14 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:20:14 | D | - sum error = [ 1484.9528, 1452.6283, 1407.1192, 1423.2942, 1449.9403] +25-08-28 08:20:14 | D | - best error = [ 1347.3964, 1347.3964, 1347.3964, 1347.3964, 1347.3964] +25-08-28 08:20:14 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:20:14 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:20:14 | D | - sum error = [ 1433.4681, 1470.3443, 1545.3752, 1641.5314] +25-08-28 08:20:14 | D | - best error = [ 1347.3964, 1347.3964, 1347.3964, 1347.3964] +25-08-28 08:20:14 | D | + error = 1347.3964 +25-08-28 08:20:14 | D | + scale = [min=0.2910, max=5.0685] +25-08-28 08:20:14 | D | - transformer_blocks.8.attn.out_proj + transformer_blocks.8.attn.add_out_proj +25-08-28 08:20:14 | D | + w: sint4 +25-08-28 08:20:14 | D | + x: sint4 +25-08-28 08:20:14 | D | + y: None +25-08-28 08:20:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:20:14 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:20:15 | D | + x - AbsMax +25-08-28 08:20:15 | D | + x = [min=1.2500, max=12.3125] +25-08-28 08:20:15 | D | + w - AbsMax +25-08-28 08:20:15 | D | + w = [min=0.1279, max=0.3164] +25-08-28 08:20:15 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:20:16 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:22:03 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:22:03 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:22:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:22:03 | D | - sum error = [ 5865.6413, 5839.2424, 5818.7559, 5803.5972, 5820.3550] +25-08-28 08:22:03 | D | - best error = [ 5865.6413, 5839.2424, 5818.7559, 5803.5972, 5803.5972] +25-08-28 08:22:03 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:22:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:22:03 | D | - sum error = [ 5814.1262, 5809.4451, 5832.1499, 5859.8334, 5892.3856] +25-08-28 08:22:03 | D | - best error = [ 5803.5972, 5803.5972, 5803.5972, 5803.5972, 5803.5972] +25-08-28 08:22:03 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:22:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:22:03 | D | - sum error = [ 5916.9857, 5956.5211, 6003.1755, 6052.4646, 6108.7387] +25-08-28 08:22:03 | D | - best error = [ 5803.5972, 5803.5972, 5803.5972, 5803.5972, 5803.5972] +25-08-28 08:22:03 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:22:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:22:03 | D | - sum error = [ 6177.2677, 6263.3557, 6329.6758, 6409.7438, 6498.5266] +25-08-28 08:22:03 | D | - best error = [ 5803.5972, 5803.5972, 5803.5972, 5803.5972, 5803.5972] +25-08-28 08:22:03 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:22:03 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:22:03 | D | - sum error = [ 6206.0055, 6132.9647, 6093.7552, 6048.7992, 6020.8976] +25-08-28 08:22:03 | D | - best error = [ 5803.5972, 5803.5972, 5803.5972, 5803.5972, 5803.5972] +25-08-28 08:22:03 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:22:03 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:22:03 | D | - sum error = [ 5984.4278, 5975.1733, 5969.1229, 5972.6858, 5988.5748] +25-08-28 08:22:03 | D | - best error = [ 5803.5972, 5803.5972, 5803.5972, 5803.5972, 5803.5972] +25-08-28 08:22:03 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:22:03 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:22:03 | D | - sum error = [ 6009.9111, 6030.8742, 6062.8254, 6106.9498, 6174.7198] +25-08-28 08:22:03 | D | - best error = [ 5803.5972, 5803.5972, 5803.5972, 5803.5972, 5803.5972] +25-08-28 08:22:03 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:22:03 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:22:03 | D | - sum error = [ 6244.0115, 6321.6115, 6390.8046, 6497.4694] +25-08-28 08:22:03 | D | - best error = [ 5803.5972, 5803.5972, 5803.5972, 5803.5972] +25-08-28 08:22:03 | D | + error = 5803.5972 +25-08-28 08:22:03 | D | + scale = [min=1.0340, max=1.4573] +25-08-28 08:22:03 | D | - transformer_blocks.8.ff.up_proj +25-08-28 08:22:03 | D | + w: sint4 +25-08-28 08:22:03 | D | + x: sint4 +25-08-28 08:22:03 | D | + y: None +25-08-28 08:22:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:22:03 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:22:04 | D | + x - AbsMax +25-08-28 08:22:04 | D | + x = [min=0.0752, max=14.7500] +25-08-28 08:22:04 | D | + w - AbsMax +25-08-28 08:22:04 | D | + w = [min=0.1113, max=0.5117] +25-08-28 08:22:04 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:22:05 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:23:22 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:23:22 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:23:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:23:22 | D | - sum error = [10485.7648, 10234.0362, 10024.8663, 9835.6958, 9674.0817] +25-08-28 08:23:22 | D | - best error = [10485.7648, 10234.0362, 10024.8663, 9835.6958, 9674.0817] +25-08-28 08:23:22 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:23:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:23:22 | D | - sum error = [ 9541.0607, 9425.4706, 9343.5291, 9285.0908, 9235.5764] +25-08-28 08:23:22 | D | - best error = [ 9541.0607, 9425.4706, 9343.5291, 9285.0908, 9235.5764] +25-08-28 08:23:22 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:23:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:23:22 | D | - sum error = [ 9221.6823, 9217.5871, 9243.9191, 9281.5999, 9339.8647] +25-08-28 08:23:22 | D | - best error = [ 9221.6823, 9217.5871, 9217.5871, 9217.5871, 9217.5871] +25-08-28 08:23:22 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:23:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:23:22 | D | - sum error = [ 9417.4120, 9497.4312, 9620.5621, 9773.4088, 9951.4136] +25-08-28 08:23:22 | D | - best error = [ 9217.5871, 9217.5871, 9217.5871, 9217.5871, 9217.5871] +25-08-28 08:23:22 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:23:22 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:23:22 | D | - sum error = [12644.6371, 12091.9643, 11596.6002, 11171.4747, 10785.6275] +25-08-28 08:23:22 | D | - best error = [ 9217.5871, 9217.5871, 9217.5871, 9217.5871, 9217.5871] +25-08-28 08:23:22 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:23:22 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:23:22 | D | - sum error = [10454.7914, 10185.6063, 9959.6197, 9775.4046, 9632.6478] +25-08-28 08:23:22 | D | - best error = [ 9217.5871, 9217.5871, 9217.5871, 9217.5871, 9217.5871] +25-08-28 08:23:22 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:23:22 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:23:22 | D | - sum error = [ 9546.5122, 9485.8788, 9458.8101, 9454.7948, 9480.3856] +25-08-28 08:23:22 | D | - best error = [ 9217.5871, 9217.5871, 9217.5871, 9217.5871, 9217.5871] +25-08-28 08:23:22 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:23:22 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:23:22 | D | - sum error = [ 9545.3193, 9624.5827, 9765.7246, 9943.3467] +25-08-28 08:23:22 | D | - best error = [ 9217.5871, 9217.5871, 9217.5871, 9217.5871] +25-08-28 08:23:22 | D | + error = 9217.5871 +25-08-28 08:23:22 | D | + scale = [min=0.2409, max=4.3938] +25-08-28 08:23:22 | D | - transformer_blocks.8.ff.down_proj +25-08-28 08:23:22 | D | + w: sint4 +25-08-28 08:23:22 | D | + x: uint4 +25-08-28 08:23:22 | D | + y: None +25-08-28 08:23:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:23:22 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:23:25 | D | + x - AbsMax +25-08-28 08:23:25 | D | + x = [min=0.1719, max=22.1250] +25-08-28 08:23:25 | D | + w - AbsMax +25-08-28 08:23:25 | D | + w = [min=0.0466, max=0.5547] +25-08-28 08:23:25 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:23:27 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:25:19 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:25:19 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:25:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:25:19 | D | - sum error = [ 9108.4343, 9082.4715, 9104.7109, 9113.1790, 9148.3913] +25-08-28 08:25:19 | D | - best error = [ 9108.4343, 9082.4715, 9082.4715, 9082.4715, 9082.4715] +25-08-28 08:25:19 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:25:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:25:19 | D | - sum error = [ 9180.4309, 9251.0691, 9357.3287, 9492.5908, 9618.5999] +25-08-28 08:25:19 | D | - best error = [ 9082.4715, 9082.4715, 9082.4715, 9082.4715, 9082.4715] +25-08-28 08:25:19 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:25:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:25:19 | D | - sum error = [ 9826.3601, 10112.0320, 10434.0757, 10787.5150, 11334.0506] +25-08-28 08:25:19 | D | - best error = [ 9082.4715, 9082.4715, 9082.4715, 9082.4715, 9082.4715] +25-08-28 08:25:19 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:25:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:25:19 | D | - sum error = [11978.2917, 13143.9909, 14753.9657, 16795.8500, 20163.0712] +25-08-28 08:25:19 | D | - best error = [ 9082.4715, 9082.4715, 9082.4715, 9082.4715, 9082.4715] +25-08-28 08:25:19 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:25:19 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:25:19 | D | - sum error = [ 9978.9294, 9814.1372, 9647.5871, 9559.6774, 9453.8107] +25-08-28 08:25:19 | D | - best error = [ 9082.4715, 9082.4715, 9082.4715, 9082.4715, 9082.4715] +25-08-28 08:25:19 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:25:19 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:25:19 | D | - sum error = [ 9399.8791, 9365.4958, 9418.7078, 9492.6317, 9557.6566] +25-08-28 08:25:19 | D | - best error = [ 9082.4715, 9082.4715, 9082.4715, 9082.4715, 9082.4715] +25-08-28 08:25:19 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:25:19 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:25:19 | D | - sum error = [ 9756.1857, 10037.8942, 10443.7648, 10930.7281, 11441.8887] +25-08-28 08:25:19 | D | - best error = [ 9082.4715, 9082.4715, 9082.4715, 9082.4715, 9082.4715] +25-08-28 08:25:19 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:25:19 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:25:19 | D | - sum error = [12429.6459, 14234.3438, 16294.6342, 19795.3530] +25-08-28 08:25:19 | D | - best error = [ 9082.4715, 9082.4715, 9082.4715, 9082.4715] +25-08-28 08:25:19 | D | + error = 9082.4715 +25-08-28 08:25:19 | D | + scale = [min=0.9157, max=1.1675] +25-08-28 08:25:19 | D | - transformer_blocks.8.ff_context.up_proj +25-08-28 08:25:19 | D | + w: sint4 +25-08-28 08:25:19 | D | + x: sint4 +25-08-28 08:25:19 | D | + y: None +25-08-28 08:25:19 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:25:19 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:25:19 | D | + x - AbsMax +25-08-28 08:25:19 | D | + x = [min=0.1777, max=15.0000] +25-08-28 08:25:19 | D | + w - AbsMax +25-08-28 08:25:19 | D | + w = [min=0.0820, max=0.4922] +25-08-28 08:25:19 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:25:19 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:26:03 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:26:03 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:26:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:26:03 | D | - sum error = [ 6034.6788, 5779.0065, 5546.2986, 5306.5017, 5103.2927] +25-08-28 08:26:03 | D | - best error = [ 6034.6788, 5779.0065, 5546.2986, 5306.5017, 5103.2927] +25-08-28 08:26:03 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:26:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:26:03 | D | - sum error = [ 4917.0499, 4750.9894, 4595.7654, 4479.3343, 4376.4017] +25-08-28 08:26:03 | D | - best error = [ 4917.0499, 4750.9894, 4595.7654, 4479.3343, 4376.4017] +25-08-28 08:26:03 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:26:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:26:03 | D | - sum error = [ 4312.3339, 4271.0299, 4215.2101, 4186.6815, 4193.6254] +25-08-28 08:26:03 | D | - best error = [ 4312.3339, 4271.0299, 4215.2101, 4186.6815, 4186.6815] +25-08-28 08:26:03 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:26:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:26:03 | D | - sum error = [ 4223.8996, 4277.8102, 4356.3563, 4470.4163, 4595.3056] +25-08-28 08:26:03 | D | - best error = [ 4186.6815, 4186.6815, 4186.6815, 4186.6815, 4186.6815] +25-08-28 08:26:03 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:26:03 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:26:03 | D | - sum error = [ 5127.6884, 4891.2066, 4738.9953, 4569.1896, 4425.0971] +25-08-28 08:26:03 | D | - best error = [ 4186.6815, 4186.6815, 4186.6815, 4186.6815, 4186.6815] +25-08-28 08:26:03 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:26:03 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:26:03 | D | - sum error = [ 4289.8818, 4198.7403, 4145.4090, 4087.4525, 4043.5123] +25-08-28 08:26:03 | D | - best error = [ 4186.6815, 4186.6815, 4145.4090, 4087.4525, 4043.5123] +25-08-28 08:26:03 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:26:03 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:26:03 | D | - sum error = [ 4031.2286, 4014.5637, 4034.3213, 4057.1078, 4110.4961] +25-08-28 08:26:03 | D | - best error = [ 4031.2286, 4014.5637, 4014.5637, 4014.5637, 4014.5637] +25-08-28 08:26:03 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:26:03 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:26:03 | D | - sum error = [ 4178.4090, 4276.8292, 4432.8977, 4562.7042] +25-08-28 08:26:03 | D | - best error = [ 4014.5637, 4014.5637, 4014.5637, 4014.5637] +25-08-28 08:26:03 | D | + error = 4014.5637 +25-08-28 08:26:03 | D | + scale = [min=0.7122, max=13.2267] +25-08-28 08:26:03 | D | - transformer_blocks.8.ff_context.down_proj +25-08-28 08:26:03 | D | + w: sint4 +25-08-28 08:26:03 | D | + x: uint4 +25-08-28 08:26:03 | D | + y: None +25-08-28 08:26:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:26:03 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:26:04 | D | + x - AbsMax +25-08-28 08:26:04 | D | + x = [min=0.1719, max=52.2500] +25-08-28 08:26:04 | D | + w - AbsMax +25-08-28 08:26:04 | D | + w = [min=0.0195, max=0.4414] +25-08-28 08:26:04 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:26:04 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:26:52 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:26:52 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:26:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:26:52 | D | - sum error = [ 1751.5989, 1754.2021, 1800.9517, 2011.6233, 2268.0593] +25-08-28 08:26:52 | D | - best error = [ 1751.5989, 1751.5989, 1751.5989, 1751.5989, 1751.5989] +25-08-28 08:26:52 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:26:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:26:52 | D | - sum error = [ 2792.7783, 3351.9384, 3950.8302, 4595.9741, 5300.2589] +25-08-28 08:26:52 | D | - best error = [ 1751.5989, 1751.5989, 1751.5989, 1751.5989, 1751.5989] +25-08-28 08:26:52 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:26:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:26:52 | D | - sum error = [ 6103.2090, 6880.1709, 7915.0603, 9098.8865, 10891.3044] +25-08-28 08:26:52 | D | - best error = [ 1751.5989, 1751.5989, 1751.5989, 1751.5989, 1751.5989] +25-08-28 08:26:52 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:26:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:26:52 | D | - sum error = [12778.9064, 14693.7004, 16425.0180, 17566.9162, 18521.9481] +25-08-28 08:26:52 | D | - best error = [ 1751.5989, 1751.5989, 1751.5989, 1751.5989, 1751.5989] +25-08-28 08:26:52 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:26:52 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:26:52 | D | - sum error = [ 1695.3789, 1711.3509, 1754.6832, 1880.5796, 2035.2978] +25-08-28 08:26:52 | D | - best error = [ 1695.3789, 1695.3789, 1695.3789, 1695.3789, 1695.3789] +25-08-28 08:26:52 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:26:52 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:26:52 | D | - sum error = [ 2286.3536, 2570.8948, 3022.5933, 3601.7783, 4528.1405] +25-08-28 08:26:52 | D | - best error = [ 1695.3789, 1695.3789, 1695.3789, 1695.3789, 1695.3789] +25-08-28 08:26:52 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:26:52 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:26:52 | D | - sum error = [ 5839.2705, 7218.3935, 8827.6303, 10694.9330, 12770.0393] +25-08-28 08:26:52 | D | - best error = [ 1695.3789, 1695.3789, 1695.3789, 1695.3789, 1695.3789] +25-08-28 08:26:52 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:26:52 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:26:52 | D | - sum error = [14693.3678, 16380.7790, 17553.6508, 18434.5059] +25-08-28 08:26:52 | D | - best error = [ 1695.3789, 1695.3789, 1695.3789, 1695.3789] +25-08-28 08:26:52 | D | + error = 1695.3789 +25-08-28 08:26:52 | D | + scale = [min=2.1747, max=38.5094] +25-08-28 08:27:12 | D | - Smoothing Diffusion Block transformer_blocks.9 +25-08-28 08:27:12 | D | - Skipping Module transformer_blocks.9.norm1.linear +25-08-28 08:27:12 | D | - Skipping Module transformer_blocks.9.norm1_context.linear +25-08-28 08:27:12 | D | - Smoothing Transformer Block transformer_blocks.9 +25-08-28 08:27:12 | D | - transformer_blocks.9.attn.qkv_proj +25-08-28 08:27:12 | D | + w: sint4 +25-08-28 08:27:12 | D | + x: sint4 +25-08-28 08:27:12 | D | + y: None +25-08-28 08:27:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:27:12 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:27:12 | D | + x - AbsMax +25-08-28 08:27:12 | D | + x = [min=0.0894, max=16.2500] +25-08-28 08:27:12 | D | + w - AbsMax +25-08-28 08:27:12 | D | + w = [min=0.1099, max=0.4629] +25-08-28 08:27:12 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:27:13 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:28:27 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:28:27 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:28:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:28:27 | D | - sum error = [ 6241.2403, 5958.3394, 5807.0454, 5644.7949, 5477.5856] +25-08-28 08:28:27 | D | - best error = [ 6241.2403, 5958.3394, 5807.0454, 5644.7949, 5477.5856] +25-08-28 08:28:27 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:28:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:28:27 | D | - sum error = [ 5369.3314, 5224.7942, 5181.0748, 5187.1840, 5114.3690] +25-08-28 08:28:27 | D | - best error = [ 5369.3314, 5224.7942, 5181.0748, 5181.0748, 5114.3690] +25-08-28 08:28:27 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:28:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:28:27 | D | - sum error = [ 5092.7393, 5153.2871, 5218.2304, 5275.2674, 5344.1645] +25-08-28 08:28:27 | D | - best error = [ 5092.7393, 5092.7393, 5092.7393, 5092.7393, 5092.7393] +25-08-28 08:28:27 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:28:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:28:27 | D | - sum error = [ 5469.1279, 5571.9435, 5689.5067, 5817.6604, 5969.6087] +25-08-28 08:28:27 | D | - best error = [ 5092.7393, 5092.7393, 5092.7393, 5092.7393, 5092.7393] +25-08-28 08:28:27 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:28:27 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:28:27 | D | - sum error = [ 8747.1559, 8136.8817, 7508.4502, 7071.6527, 6686.3243] +25-08-28 08:28:27 | D | - best error = [ 5092.7393, 5092.7393, 5092.7393, 5092.7393, 5092.7393] +25-08-28 08:28:27 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:28:27 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:28:27 | D | - sum error = [ 6413.1493, 6167.3498, 5922.7284, 5769.6373, 5622.8637] +25-08-28 08:28:27 | D | - best error = [ 5092.7393, 5092.7393, 5092.7393, 5092.7393, 5092.7393] +25-08-28 08:28:27 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:28:27 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:28:27 | D | - sum error = [ 5511.8770, 5507.5212, 5519.2777, 5595.8277, 5614.3741] +25-08-28 08:28:27 | D | - best error = [ 5092.7393, 5092.7393, 5092.7393, 5092.7393, 5092.7393] +25-08-28 08:28:27 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:28:27 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:28:27 | D | - sum error = [ 5674.1362, 5743.9549, 5844.9647, 6007.8778] +25-08-28 08:28:27 | D | - best error = [ 5092.7393, 5092.7393, 5092.7393, 5092.7393] +25-08-28 08:28:27 | D | + error = 5092.7393 +25-08-28 08:28:27 | D | + scale = [min=0.2989, max=4.0311] +25-08-28 08:28:27 | D | - transformer_blocks.9.attn add_qkv_proj +25-08-28 08:28:27 | D | + w: sint4 +25-08-28 08:28:27 | D | + x: sint4 +25-08-28 08:28:27 | D | + y: None +25-08-28 08:28:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:28:27 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:28:27 | D | + x - AbsMax +25-08-28 08:28:27 | D | + x = [min=0.1553, max=20.3750] +25-08-28 08:28:27 | D | + w - AbsMax +25-08-28 08:28:27 | D | + w = [min=0.1177, max=0.4414] +25-08-28 08:28:27 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:28:28 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:29:33 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:29:33 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:29:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:29:33 | D | - sum error = [ 1907.0585, 1829.1492, 1747.7762, 1656.3233, 1592.0638] +25-08-28 08:29:33 | D | - best error = [ 1907.0585, 1829.1492, 1747.7762, 1656.3233, 1592.0638] +25-08-28 08:29:33 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:29:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:29:33 | D | - sum error = [ 1540.3253, 1481.8149, 1430.4819, 1401.9049, 1354.3172] +25-08-28 08:29:33 | D | - best error = [ 1540.3253, 1481.8149, 1430.4819, 1401.9049, 1354.3172] +25-08-28 08:29:33 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:29:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:29:33 | D | - sum error = [ 1340.8555, 1302.9574, 1288.0506, 1277.9248, 1299.3279] +25-08-28 08:29:33 | D | - best error = [ 1340.8555, 1302.9574, 1288.0506, 1277.9248, 1277.9248] +25-08-28 08:29:33 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:29:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:29:33 | D | - sum error = [ 1319.7075, 1363.3090, 1410.0713, 1446.9329, 1512.6143] +25-08-28 08:29:33 | D | - best error = [ 1277.9248, 1277.9248, 1277.9248, 1277.9248, 1277.9248] +25-08-28 08:29:33 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:29:33 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:29:33 | D | - sum error = [ 2279.1537, 2162.9632, 2035.8761, 1907.6706, 1818.3519] +25-08-28 08:29:33 | D | - best error = [ 1277.9248, 1277.9248, 1277.9248, 1277.9248, 1277.9248] +25-08-28 08:29:33 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:29:33 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:29:33 | D | - sum error = [ 1744.0107, 1657.4522, 1607.7516, 1519.8769, 1452.8167] +25-08-28 08:29:33 | D | - best error = [ 1277.9248, 1277.9248, 1277.9248, 1277.9248, 1277.9248] +25-08-28 08:29:33 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:29:33 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:29:33 | D | - sum error = [ 1415.5189, 1371.2549, 1333.1780, 1325.6870, 1346.1075] +25-08-28 08:29:33 | D | - best error = [ 1277.9248, 1277.9248, 1277.9248, 1277.9248, 1277.9248] +25-08-28 08:29:33 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:29:33 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:29:33 | D | - sum error = [ 1378.4365, 1407.3608, 1446.5555, 1511.2787] +25-08-28 08:29:33 | D | - best error = [ 1277.9248, 1277.9248, 1277.9248, 1277.9248] +25-08-28 08:29:33 | D | + error = 1277.9248 +25-08-28 08:29:33 | D | + scale = [min=0.2980, max=7.0944] +25-08-28 08:29:33 | D | - transformer_blocks.9.attn.out_proj + transformer_blocks.9.attn.add_out_proj +25-08-28 08:29:33 | D | + w: sint4 +25-08-28 08:29:33 | D | + x: sint4 +25-08-28 08:29:33 | D | + y: None +25-08-28 08:29:33 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:29:33 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:29:34 | D | + x - AbsMax +25-08-28 08:29:34 | D | + x = [min=2.4375, max=15.8750] +25-08-28 08:29:34 | D | + w - AbsMax +25-08-28 08:29:34 | D | + w = [min=0.1260, max=0.3164] +25-08-28 08:29:34 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:29:35 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:31:24 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:31:24 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:31:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:31:24 | D | - sum error = [ 8378.3106, 8338.7615, 8298.4760, 8258.5500, 8228.5877] +25-08-28 08:31:24 | D | - best error = [ 8378.3106, 8338.7615, 8298.4760, 8258.5500, 8228.5877] +25-08-28 08:31:24 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:31:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:31:24 | D | - sum error = [ 8209.8794, 8219.7119, 8216.6684, 8234.2514, 8249.9042] +25-08-28 08:31:24 | D | - best error = [ 8209.8794, 8209.8794, 8209.8794, 8209.8794, 8209.8794] +25-08-28 08:31:24 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:31:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:31:24 | D | - sum error = [ 8279.3893, 8320.9471, 8359.6072, 8424.3799, 8486.1515] +25-08-28 08:31:24 | D | - best error = [ 8209.8794, 8209.8794, 8209.8794, 8209.8794, 8209.8794] +25-08-28 08:31:24 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:31:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:31:24 | D | - sum error = [ 8562.0572, 8653.4867, 8772.7925, 8867.4416, 8970.8301] +25-08-28 08:31:24 | D | - best error = [ 8209.8794, 8209.8794, 8209.8794, 8209.8794, 8209.8794] +25-08-28 08:31:24 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:31:24 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:31:24 | D | - sum error = [ 9065.2241, 8922.0410, 8794.6375, 8707.3192, 8623.9620] +25-08-28 08:31:24 | D | - best error = [ 8209.8794, 8209.8794, 8209.8794, 8209.8794, 8209.8794] +25-08-28 08:31:24 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:31:24 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:31:24 | D | - sum error = [ 8548.4937, 8504.5621, 8466.5242, 8452.6204, 8455.6140] +25-08-28 08:31:24 | D | - best error = [ 8209.8794, 8209.8794, 8209.8794, 8209.8794, 8209.8794] +25-08-28 08:31:24 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:31:24 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:31:24 | D | - sum error = [ 8477.0995, 8460.1754, 8501.4098, 8544.4513, 8612.9400] +25-08-28 08:31:24 | D | - best error = [ 8209.8794, 8209.8794, 8209.8794, 8209.8794, 8209.8794] +25-08-28 08:31:24 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:31:24 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:31:24 | D | - sum error = [ 8688.5396, 8774.8199, 8867.8827, 8965.4956] +25-08-28 08:31:24 | D | - best error = [ 8209.8794, 8209.8794, 8209.8794, 8209.8794] +25-08-28 08:31:24 | D | + error = 8209.8794 +25-08-28 08:31:24 | D | + scale = [min=1.2495, max=1.9961] +25-08-28 08:31:24 | D | - transformer_blocks.9.ff.up_proj +25-08-28 08:31:24 | D | + w: sint4 +25-08-28 08:31:24 | D | + x: sint4 +25-08-28 08:31:24 | D | + y: None +25-08-28 08:31:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:31:24 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:31:25 | D | + x - AbsMax +25-08-28 08:31:25 | D | + x = [min=0.0728, max=11.9375] +25-08-28 08:31:25 | D | + w - AbsMax +25-08-28 08:31:25 | D | + w = [min=0.1191, max=0.5859] +25-08-28 08:31:25 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:31:26 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:32:43 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:32:43 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:32:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:32:43 | D | - sum error = [11514.7846, 11202.9635, 10939.8526, 10717.9722, 10523.4990] +25-08-28 08:32:43 | D | - best error = [11514.7846, 11202.9635, 10939.8526, 10717.9722, 10523.4990] +25-08-28 08:32:43 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:32:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:32:43 | D | - sum error = [10349.6779, 10221.2175, 10111.4668, 10032.7556, 9977.7769] +25-08-28 08:32:43 | D | - best error = [10349.6779, 10221.2175, 10111.4668, 10032.7556, 9977.7769] +25-08-28 08:32:43 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:32:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:32:43 | D | - sum error = [ 9963.0192, 9963.1919, 9993.1545, 10041.8907, 10126.3589] +25-08-28 08:32:43 | D | - best error = [ 9963.0192, 9963.0192, 9963.0192, 9963.0192, 9963.0192] +25-08-28 08:32:43 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:32:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:32:43 | D | - sum error = [10238.3792, 10357.5557, 10520.7829, 10718.3070, 10943.8406] +25-08-28 08:32:43 | D | - best error = [ 9963.0192, 9963.0192, 9963.0192, 9963.0192, 9963.0192] +25-08-28 08:32:43 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:32:43 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:32:43 | D | - sum error = [13900.4057, 13249.3683, 12668.1048, 12150.4225, 11707.7629] +25-08-28 08:32:43 | D | - best error = [ 9963.0192, 9963.0192, 9963.0192, 9963.0192, 9963.0192] +25-08-28 08:32:43 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:32:43 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:32:43 | D | - sum error = [11328.1561, 11004.5620, 10741.4014, 10530.3892, 10356.2198] +25-08-28 08:32:43 | D | - best error = [ 9963.0192, 9963.0192, 9963.0192, 9963.0192, 9963.0192] +25-08-28 08:32:43 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:32:43 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:32:43 | D | - sum error = [10246.8368, 10171.2481, 10148.2617, 10167.0861, 10234.4907] +25-08-28 08:32:43 | D | - best error = [ 9963.0192, 9963.0192, 9963.0192, 9963.0192, 9963.0192] +25-08-28 08:32:43 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:32:43 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:32:43 | D | - sum error = [10342.8058, 10498.1659, 10677.9639, 10920.8666] +25-08-28 08:32:43 | D | - best error = [ 9963.0192, 9963.0192, 9963.0192, 9963.0192] +25-08-28 08:32:43 | D | + error = 9963.0192 +25-08-28 08:32:43 | D | + scale = [min=0.2697, max=3.4551] +25-08-28 08:32:43 | D | - transformer_blocks.9.ff.down_proj +25-08-28 08:32:43 | D | + w: sint4 +25-08-28 08:32:43 | D | + x: uint4 +25-08-28 08:32:43 | D | + y: None +25-08-28 08:32:43 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:32:43 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:32:46 | D | + x - AbsMax +25-08-28 08:32:46 | D | + x = [min=0.1719, max=15.1875] +25-08-28 08:32:46 | D | + w - AbsMax +25-08-28 08:32:46 | D | + w = [min=0.0669, max=0.6406] +25-08-28 08:32:46 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:32:48 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:34:39 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:34:39 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:34:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:34:39 | D | - sum error = [ 9479.3814, 9427.7828, 9422.3628, 9443.0050, 9449.9812] +25-08-28 08:34:39 | D | - best error = [ 9479.3814, 9427.7828, 9422.3628, 9422.3628, 9422.3628] +25-08-28 08:34:39 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:34:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:34:39 | D | - sum error = [ 9543.6039, 9644.5126, 9735.1335, 9840.8109, 9999.1776] +25-08-28 08:34:39 | D | - best error = [ 9422.3628, 9422.3628, 9422.3628, 9422.3628, 9422.3628] +25-08-28 08:34:39 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:34:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:34:39 | D | - sum error = [10206.0617, 10457.1231, 10794.7517, 11269.8635, 11818.9888] +25-08-28 08:34:39 | D | - best error = [ 9422.3628, 9422.3628, 9422.3628, 9422.3628, 9422.3628] +25-08-28 08:34:39 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:34:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:34:39 | D | - sum error = [12428.3360, 13339.1820, 14934.4477, 16964.7503, 19917.8778] +25-08-28 08:34:39 | D | - best error = [ 9422.3628, 9422.3628, 9422.3628, 9422.3628, 9422.3628] +25-08-28 08:34:39 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:34:39 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:34:39 | D | - sum error = [10560.9617, 10346.6284, 10176.3145, 10041.7596, 9904.6179] +25-08-28 08:34:39 | D | - best error = [ 9422.3628, 9422.3628, 9422.3628, 9422.3628, 9422.3628] +25-08-28 08:34:39 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:34:39 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:34:39 | D | - sum error = [ 9796.9011, 9747.9032, 9814.2761, 9797.5862, 9881.1809] +25-08-28 08:34:39 | D | - best error = [ 9422.3628, 9422.3628, 9422.3628, 9422.3628, 9422.3628] +25-08-28 08:34:39 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:34:39 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:34:39 | D | - sum error = [10029.1344, 10302.2281, 10733.6272, 11262.2033, 11895.1802] +25-08-28 08:34:39 | D | - best error = [ 9422.3628, 9422.3628, 9422.3628, 9422.3628, 9422.3628] +25-08-28 08:34:39 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:34:39 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:34:39 | D | - sum error = [12698.8692, 14232.2428, 16464.7250, 19641.9282] +25-08-28 08:34:39 | D | - best error = [ 9422.3628, 9422.3628, 9422.3628, 9422.3628] +25-08-28 08:34:39 | D | + error = 9422.3628 +25-08-28 08:34:39 | D | + scale = [min=0.8385, max=1.3126] +25-08-28 08:34:39 | D | - transformer_blocks.9.ff_context.up_proj +25-08-28 08:34:39 | D | + w: sint4 +25-08-28 08:34:39 | D | + x: sint4 +25-08-28 08:34:39 | D | + y: None +25-08-28 08:34:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:34:39 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:34:40 | D | + x - AbsMax +25-08-28 08:34:40 | D | + x = [min=0.1846, max=16.7500] +25-08-28 08:34:40 | D | + w - AbsMax +25-08-28 08:34:40 | D | + w = [min=0.0796, max=0.5117] +25-08-28 08:34:40 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:34:40 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:35:24 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:35:24 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:35:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:35:24 | D | - sum error = [ 6719.3625, 6351.8312, 6037.2431, 5759.9967, 5494.6239] +25-08-28 08:35:24 | D | - best error = [ 6719.3625, 6351.8312, 6037.2431, 5759.9967, 5494.6239] +25-08-28 08:35:24 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:35:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:35:24 | D | - sum error = [ 5264.9287, 5055.1554, 4877.0345, 4738.5171, 4611.1940] +25-08-28 08:35:24 | D | - best error = [ 5264.9287, 5055.1554, 4877.0345, 4738.5171, 4611.1940] +25-08-28 08:35:24 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:35:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:35:24 | D | - sum error = [ 4518.1881, 4455.4602, 4406.4469, 4378.2515, 4369.8808] +25-08-28 08:35:24 | D | - best error = [ 4518.1881, 4455.4602, 4406.4469, 4378.2515, 4369.8808] +25-08-28 08:35:24 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:35:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:35:24 | D | - sum error = [ 4392.3647, 4404.8026, 4475.2774, 4547.5268, 4656.0526] +25-08-28 08:35:24 | D | - best error = [ 4369.8808, 4369.8808, 4369.8808, 4369.8808, 4369.8808] +25-08-28 08:35:24 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:35:24 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:35:24 | D | - sum error = [ 5568.3001, 5303.4666, 5091.1818, 4901.0746, 4709.7902] +25-08-28 08:35:24 | D | - best error = [ 4369.8808, 4369.8808, 4369.8808, 4369.8808, 4369.8808] +25-08-28 08:35:24 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:35:24 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:35:24 | D | - sum error = [ 4575.0075, 4452.8328, 4351.7808, 4286.2968, 4221.4097] +25-08-28 08:35:24 | D | - best error = [ 4369.8808, 4369.8808, 4351.7808, 4286.2968, 4221.4097] +25-08-28 08:35:24 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:35:24 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:35:24 | D | - sum error = [ 4197.2844, 4167.0112, 4182.5789, 4191.3175, 4234.8804] +25-08-28 08:35:24 | D | - best error = [ 4197.2844, 4167.0112, 4167.0112, 4167.0112, 4167.0112] +25-08-28 08:35:24 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:35:24 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:35:24 | D | - sum error = [ 4305.8653, 4367.2119, 4490.4349, 4611.8491] +25-08-28 08:35:24 | D | - best error = [ 4167.0112, 4167.0112, 4167.0112, 4167.0112] +25-08-28 08:35:24 | D | + error = 4167.0112 +25-08-28 08:35:24 | D | + scale = [min=0.7178, max=12.5424] +25-08-28 08:35:24 | D | - transformer_blocks.9.ff_context.down_proj +25-08-28 08:35:24 | D | + w: sint4 +25-08-28 08:35:24 | D | + x: uint4 +25-08-28 08:35:24 | D | + y: None +25-08-28 08:35:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:35:24 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:35:24 | D | + x - AbsMax +25-08-28 08:35:24 | D | + x = [min=0.1719, max=39.2500] +25-08-28 08:35:24 | D | + w - AbsMax +25-08-28 08:35:24 | D | + w = [min=0.0228, max=0.5508] +25-08-28 08:35:24 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:35:25 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:36:13 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:36:13 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:36:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:36:13 | D | - sum error = [ 1816.0721, 1816.9435, 1847.3997, 1947.2330, 2114.8569] +25-08-28 08:36:13 | D | - best error = [ 1816.0721, 1816.0721, 1816.0721, 1816.0721, 1816.0721] +25-08-28 08:36:13 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:36:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:36:13 | D | - sum error = [ 2424.6494, 2765.9076, 3229.3411, 3871.3121, 4537.6060] +25-08-28 08:36:13 | D | - best error = [ 1816.0721, 1816.0721, 1816.0721, 1816.0721, 1816.0721] +25-08-28 08:36:13 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:36:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:36:13 | D | - sum error = [ 5218.4099, 6008.4008, 6854.0948, 7996.3479, 9369.7547] +25-08-28 08:36:13 | D | - best error = [ 1816.0721, 1816.0721, 1816.0721, 1816.0721, 1816.0721] +25-08-28 08:36:13 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:36:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:36:13 | D | - sum error = [10973.1691, 12152.2251, 13231.9129, 14234.9126, 15679.8071] +25-08-28 08:36:13 | D | - best error = [ 1816.0721, 1816.0721, 1816.0721, 1816.0721, 1816.0721] +25-08-28 08:36:13 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:36:13 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:36:13 | D | - sum error = [ 1886.4618, 1856.0028, 1885.0334, 1996.6138, 2148.3486] +25-08-28 08:36:13 | D | - best error = [ 1816.0721, 1816.0721, 1816.0721, 1816.0721, 1816.0721] +25-08-28 08:36:13 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:36:13 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:36:13 | D | - sum error = [ 2269.0681, 2546.4605, 2909.8474, 3405.8291, 4245.8066] +25-08-28 08:36:13 | D | - best error = [ 1816.0721, 1816.0721, 1816.0721, 1816.0721, 1816.0721] +25-08-28 08:36:13 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:36:13 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:36:13 | D | - sum error = [ 5200.2273, 6371.8873, 7699.2524, 9264.9546, 10899.6143] +25-08-28 08:36:13 | D | - best error = [ 1816.0721, 1816.0721, 1816.0721, 1816.0721, 1816.0721] +25-08-28 08:36:13 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:36:13 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:36:13 | D | - sum error = [12178.3953, 13236.1191, 14250.5405, 15695.3405] +25-08-28 08:36:13 | D | - best error = [ 1816.0721, 1816.0721, 1816.0721, 1816.0721] +25-08-28 08:36:13 | D | + error = 1816.0721 +25-08-28 08:36:13 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 08:36:32 | D | - Smoothing Diffusion Block transformer_blocks.10 +25-08-28 08:36:32 | D | - Skipping Module transformer_blocks.10.norm1.linear +25-08-28 08:36:32 | D | - Skipping Module transformer_blocks.10.norm1_context.linear +25-08-28 08:36:32 | D | - Smoothing Transformer Block transformer_blocks.10 +25-08-28 08:36:32 | D | - transformer_blocks.10.attn.qkv_proj +25-08-28 08:36:32 | D | + w: sint4 +25-08-28 08:36:32 | D | + x: sint4 +25-08-28 08:36:32 | D | + y: None +25-08-28 08:36:32 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:36:32 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:36:33 | D | + x - AbsMax +25-08-28 08:36:33 | D | + x = [min=0.0879, max=14.8125] +25-08-28 08:36:33 | D | + w - AbsMax +25-08-28 08:36:33 | D | + w = [min=0.1113, max=0.4766] +25-08-28 08:36:33 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:36:34 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:37:49 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:37:49 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:37:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:37:49 | D | - sum error = [ 6824.8622, 6466.2156, 6243.2277, 6060.8161, 5822.2329] +25-08-28 08:37:49 | D | - best error = [ 6824.8622, 6466.2156, 6243.2277, 6060.8161, 5822.2329] +25-08-28 08:37:49 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:37:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:37:49 | D | - sum error = [ 5666.5905, 5530.6614, 5515.3136, 5484.5988, 5423.4347] +25-08-28 08:37:49 | D | - best error = [ 5666.5905, 5530.6614, 5515.3136, 5484.5988, 5423.4347] +25-08-28 08:37:49 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:37:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:37:49 | D | - sum error = [ 5509.9249, 5480.5730, 5637.4349, 5652.5743, 5862.8935] +25-08-28 08:37:49 | D | - best error = [ 5423.4347, 5423.4347, 5423.4347, 5423.4347, 5423.4347] +25-08-28 08:37:49 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:37:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:37:49 | D | - sum error = [ 5904.7391, 6084.4372, 6343.2109, 6572.9969, 6906.3553] +25-08-28 08:37:49 | D | - best error = [ 5423.4347, 5423.4347, 5423.4347, 5423.4347, 5423.4347] +25-08-28 08:37:49 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:37:49 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:37:49 | D | - sum error = [11171.8743, 10292.9405, 9401.4799, 8788.1252, 8268.3261] +25-08-28 08:37:49 | D | - best error = [ 5423.4347, 5423.4347, 5423.4347, 5423.4347, 5423.4347] +25-08-28 08:37:49 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:37:49 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:37:49 | D | - sum error = [ 7873.4930, 7481.3507, 7092.7106, 6814.2932, 6653.2128] +25-08-28 08:37:49 | D | - best error = [ 5423.4347, 5423.4347, 5423.4347, 5423.4347, 5423.4347] +25-08-28 08:37:49 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:37:49 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:37:49 | D | - sum error = [ 6441.9357, 6379.2413, 6405.8108, 6340.8072, 6362.4093] +25-08-28 08:37:49 | D | - best error = [ 5423.4347, 5423.4347, 5423.4347, 5423.4347, 5423.4347] +25-08-28 08:37:49 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:37:49 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:37:49 | D | - sum error = [ 6390.5173, 6564.9262, 6626.7681, 6926.0580] +25-08-28 08:37:49 | D | - best error = [ 5423.4347, 5423.4347, 5423.4347, 5423.4347] +25-08-28 08:37:49 | D | + error = 5423.4347 +25-08-28 08:37:49 | D | + scale = [min=0.3348, max=3.3634] +25-08-28 08:37:49 | D | - transformer_blocks.10.attn add_qkv_proj +25-08-28 08:37:49 | D | + w: sint4 +25-08-28 08:37:49 | D | + x: sint4 +25-08-28 08:37:49 | D | + y: None +25-08-28 08:37:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:37:49 | D | + finished parsing calibration arguments, ram usage: 16.8 +25-08-28 08:37:49 | D | + x - AbsMax +25-08-28 08:37:49 | D | + x = [min=0.2070, max=15.0000] +25-08-28 08:37:49 | D | + w - AbsMax +25-08-28 08:37:49 | D | + w = [min=0.1191, max=0.3770] +25-08-28 08:37:49 | D | + finished resetting calibrator, ram usage: 16.8 +25-08-28 08:37:50 | D | + finished calculating the original outputs, ram usage: 17.0 +25-08-28 08:38:56 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:38:56 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:38:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:38:56 | D | - sum error = [ 2456.4194, 2368.3581, 2309.0520, 2218.9021, 2151.6538] +25-08-28 08:38:56 | D | - best error = [ 2456.4194, 2368.3581, 2309.0520, 2218.9021, 2151.6538] +25-08-28 08:38:56 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:38:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:38:56 | D | - sum error = [ 2098.9263, 2058.2700, 2010.6048, 1970.5937, 1928.8133] +25-08-28 08:38:56 | D | - best error = [ 2098.9263, 2058.2700, 2010.6048, 1970.5937, 1928.8133] +25-08-28 08:38:56 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:38:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:38:56 | D | - sum error = [ 1887.0809, 1856.5537, 1843.0150, 1848.0463, 1874.9918] +25-08-28 08:38:56 | D | - best error = [ 1887.0809, 1856.5537, 1843.0150, 1843.0150, 1843.0150] +25-08-28 08:38:56 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:38:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:38:56 | D | - sum error = [ 1894.9726, 1903.3618, 1900.3227, 1949.1730, 2074.0121] +25-08-28 08:38:56 | D | - best error = [ 1843.0150, 1843.0150, 1843.0150, 1843.0150, 1843.0150] +25-08-28 08:38:56 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:38:56 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:38:56 | D | - sum error = [ 2895.3255, 2759.2688, 2614.1925, 2506.4814, 2417.5334] +25-08-28 08:38:56 | D | - best error = [ 1843.0150, 1843.0150, 1843.0150, 1843.0150, 1843.0150] +25-08-28 08:38:56 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:38:56 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:38:56 | D | - sum error = [ 2304.1679, 2227.8568, 2173.7701, 2126.3981, 2063.2535] +25-08-28 08:38:56 | D | - best error = [ 1843.0150, 1843.0150, 1843.0150, 1843.0150, 1843.0150] +25-08-28 08:38:56 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:38:56 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:38:56 | D | - sum error = [ 1992.9714, 1955.2139, 1968.3940, 1952.0284, 1928.2572] +25-08-28 08:38:56 | D | - best error = [ 1843.0150, 1843.0150, 1843.0150, 1843.0150, 1843.0150] +25-08-28 08:38:56 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:38:56 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:38:56 | D | - sum error = [ 1949.8252, 1931.7998, 1949.7737, 2076.2894] +25-08-28 08:38:56 | D | - best error = [ 1843.0150, 1843.0150, 1843.0150, 1843.0150] +25-08-28 08:38:56 | D | + error = 1843.0150 +25-08-28 08:38:56 | D | + scale = [min=0.3887, max=5.0776] +25-08-28 08:38:56 | D | - transformer_blocks.10.attn.out_proj + transformer_blocks.10.attn.add_out_proj +25-08-28 08:38:56 | D | + w: sint4 +25-08-28 08:38:56 | D | + x: sint4 +25-08-28 08:38:56 | D | + y: None +25-08-28 08:38:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:38:56 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:38:57 | D | + x - AbsMax +25-08-28 08:38:57 | D | + x = [min=2.5312, max=18.2500] +25-08-28 08:38:57 | D | + w - AbsMax +25-08-28 08:38:57 | D | + w = [min=0.1235, max=0.2637] +25-08-28 08:38:57 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:38:59 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:40:48 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:40:48 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:40:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:40:48 | D | - sum error = [ 9424.7054, 9365.1793, 9311.1129, 9283.1327, 9248.6142] +25-08-28 08:40:48 | D | - best error = [ 9424.7054, 9365.1793, 9311.1129, 9283.1327, 9248.6142] +25-08-28 08:40:48 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:40:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:40:48 | D | - sum error = [ 9250.6155, 9252.4418, 9295.0847, 9310.2013, 9354.4856] +25-08-28 08:40:48 | D | - best error = [ 9248.6142, 9248.6142, 9248.6142, 9248.6142, 9248.6142] +25-08-28 08:40:48 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:40:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:40:48 | D | - sum error = [ 9444.0131, 9504.8590, 9606.3013, 9692.8254, 9821.4590] +25-08-28 08:40:48 | D | - best error = [ 9248.6142, 9248.6142, 9248.6142, 9248.6142, 9248.6142] +25-08-28 08:40:48 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:40:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:40:48 | D | - sum error = [ 9975.9751, 10078.7995, 10230.9069, 10399.2304, 10566.3670] +25-08-28 08:40:48 | D | - best error = [ 9248.6142, 9248.6142, 9248.6142, 9248.6142, 9248.6142] +25-08-28 08:40:48 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:40:48 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:40:48 | D | - sum error = [10035.6887, 9903.1114, 9814.2619, 9726.0977, 9657.5732] +25-08-28 08:40:48 | D | - best error = [ 9248.6142, 9248.6142, 9248.6142, 9248.6142, 9248.6142] +25-08-28 08:40:48 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:40:48 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:40:48 | D | - sum error = [ 9596.7740, 9569.7720, 9557.4034, 9580.3353, 9619.6956] +25-08-28 08:40:48 | D | - best error = [ 9248.6142, 9248.6142, 9248.6142, 9248.6142, 9248.6142] +25-08-28 08:40:48 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:40:48 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:40:48 | D | - sum error = [ 9649.4501, 9705.5044, 9777.7609, 9870.4119, 9989.7564] +25-08-28 08:40:48 | D | - best error = [ 9248.6142, 9248.6142, 9248.6142, 9248.6142, 9248.6142] +25-08-28 08:40:48 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:40:48 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:40:48 | D | - sum error = [10108.3245, 10240.4656, 10410.9173, 10582.4035] +25-08-28 08:40:48 | D | - best error = [ 9248.6142, 9248.6142, 9248.6142, 9248.6142] +25-08-28 08:40:48 | D | + error = 9248.6142 +25-08-28 08:40:48 | D | + scale = [min=1.2041, max=1.7875] +25-08-28 08:40:48 | D | - transformer_blocks.10.ff.up_proj +25-08-28 08:40:48 | D | + w: sint4 +25-08-28 08:40:48 | D | + x: sint4 +25-08-28 08:40:48 | D | + y: None +25-08-28 08:40:48 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:40:48 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:40:49 | D | + x - AbsMax +25-08-28 08:40:49 | D | + x = [min=0.1318, max=11.8125] +25-08-28 08:40:49 | D | + w - AbsMax +25-08-28 08:40:49 | D | + w = [min=0.1162, max=0.4863] +25-08-28 08:40:49 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:40:50 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:42:06 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:42:06 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:42:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:42:06 | D | - sum error = [13048.7112, 12578.9729, 12167.3826, 11801.2508, 11479.6641] +25-08-28 08:42:06 | D | - best error = [13048.7112, 12578.9729, 12167.3826, 11801.2508, 11479.6641] +25-08-28 08:42:06 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:42:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:42:06 | D | - sum error = [11211.6882, 10987.7386, 10807.2664, 10655.3731, 10557.8718] +25-08-28 08:42:06 | D | - best error = [11211.6882, 10987.7386, 10807.2664, 10655.3731, 10557.8718] +25-08-28 08:42:06 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:42:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:42:06 | D | - sum error = [10479.4320, 10456.3014, 10458.6079, 10516.3998, 10595.2302] +25-08-28 08:42:06 | D | - best error = [10479.4320, 10456.3014, 10456.3014, 10456.3014, 10456.3014] +25-08-28 08:42:06 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:42:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:42:06 | D | - sum error = [10696.3493, 10855.5718, 11062.2533, 11318.2317, 11603.8256] +25-08-28 08:42:06 | D | - best error = [10456.3014, 10456.3014, 10456.3014, 10456.3014, 10456.3014] +25-08-28 08:42:06 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:42:06 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:42:06 | D | - sum error = [14616.0969, 13911.7700, 13291.6617, 12730.4624, 12252.6967] +25-08-28 08:42:06 | D | - best error = [10456.3014, 10456.3014, 10456.3014, 10456.3014, 10456.3014] +25-08-28 08:42:06 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:42:06 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:42:06 | D | - sum error = [11834.1188, 11484.9049, 11182.9738, 10936.4824, 10767.5442] +25-08-28 08:42:06 | D | - best error = [10456.3014, 10456.3014, 10456.3014, 10456.3014, 10456.3014] +25-08-28 08:42:06 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:42:06 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:42:06 | D | - sum error = [10640.2414, 10577.3543, 10552.3965, 10609.1100, 10685.6989] +25-08-28 08:42:06 | D | - best error = [10456.3014, 10456.3014, 10456.3014, 10456.3014, 10456.3014] +25-08-28 08:42:06 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:42:06 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:42:06 | D | - sum error = [10808.5995, 11007.3121, 11259.6639, 11571.5088] +25-08-28 08:42:06 | D | - best error = [10456.3014, 10456.3014, 10456.3014, 10456.3014] +25-08-28 08:42:06 | D | + error = 10456.3014 +25-08-28 08:42:06 | D | + scale = [min=0.3281, max=3.8886] +25-08-28 08:42:06 | D | - transformer_blocks.10.ff.down_proj +25-08-28 08:42:06 | D | + w: sint4 +25-08-28 08:42:06 | D | + x: uint4 +25-08-28 08:42:06 | D | + y: None +25-08-28 08:42:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:42:06 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:42:09 | D | + x - AbsMax +25-08-28 08:42:09 | D | + x = [min=0.1719, max=16.7500] +25-08-28 08:42:09 | D | + w - AbsMax +25-08-28 08:42:09 | D | + w = [min=0.0500, max=0.8203] +25-08-28 08:42:09 | D | + finished resetting calibrator, ram usage: 19.2 +25-08-28 08:42:11 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:44:00 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:44:00 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:44:00 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:44:00 | D | - sum error = [ 9208.9374, 9159.8384, 9163.7726, 9159.3912, 9199.2961] +25-08-28 08:44:00 | D | - best error = [ 9208.9374, 9159.8384, 9159.8384, 9159.3912, 9159.3912] +25-08-28 08:44:00 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:44:00 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:44:00 | D | - sum error = [ 9245.9747, 9312.2783, 9441.6143, 9605.4310, 9806.4505] +25-08-28 08:44:00 | D | - best error = [ 9159.3912, 9159.3912, 9159.3912, 9159.3912, 9159.3912] +25-08-28 08:44:00 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:44:00 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:44:00 | D | - sum error = [10169.0405, 10448.9222, 10985.6244, 11586.0813, 12390.3724] +25-08-28 08:44:00 | D | - best error = [ 9159.3912, 9159.3912, 9159.3912, 9159.3912, 9159.3912] +25-08-28 08:44:00 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:44:00 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:44:00 | D | - sum error = [13172.2873, 14117.7092, 15887.1505, 18116.7096, 20846.6032] +25-08-28 08:44:00 | D | - best error = [ 9159.3912, 9159.3912, 9159.3912, 9159.3912, 9159.3912] +25-08-28 08:44:00 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:44:00 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:44:00 | D | - sum error = [ 9997.5445, 9763.1991, 9574.3461, 9446.4884, 9370.1065] +25-08-28 08:44:00 | D | - best error = [ 9159.3912, 9159.3912, 9159.3912, 9159.3912, 9159.3912] +25-08-28 08:44:00 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:44:00 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:44:00 | D | - sum error = [ 9277.7790, 9273.8273, 9302.8692, 9473.7275, 9574.2801] +25-08-28 08:44:00 | D | - best error = [ 9159.3912, 9159.3912, 9159.3912, 9159.3912, 9159.3912] +25-08-28 08:44:00 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:44:00 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:44:00 | D | - sum error = [ 9872.7650, 10279.2264, 10870.4267, 11697.5279, 12575.7634] +25-08-28 08:44:00 | D | - best error = [ 9159.3912, 9159.3912, 9159.3912, 9159.3912, 9159.3912] +25-08-28 08:44:00 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:44:00 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:44:00 | D | - sum error = [13404.5324, 15173.7294, 17629.2257, 20589.3029] +25-08-28 08:44:00 | D | - best error = [ 9159.3912, 9159.3912, 9159.3912, 9159.3912] +25-08-28 08:44:00 | D | + error = 9159.3912 +25-08-28 08:44:00 | D | + scale = [min=0.7679, max=1.5262] +25-08-28 08:44:00 | D | - transformer_blocks.10.ff_context.up_proj +25-08-28 08:44:00 | D | + w: sint4 +25-08-28 08:44:00 | D | + x: sint4 +25-08-28 08:44:00 | D | + y: None +25-08-28 08:44:00 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:44:00 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:44:00 | D | + x - AbsMax +25-08-28 08:44:00 | D | + x = [min=0.1758, max=80.5000] +25-08-28 08:44:00 | D | + w - AbsMax +25-08-28 08:44:00 | D | + w = [min=0.1250, max=0.4512] +25-08-28 08:44:00 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:44:01 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:44:44 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:44:44 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:44:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:44:44 | D | - sum error = [ 8046.6612, 7561.9167, 7167.0476, 6792.5041, 6477.9577] +25-08-28 08:44:44 | D | - best error = [ 8046.6612, 7561.9167, 7167.0476, 6792.5041, 6477.9577] +25-08-28 08:44:44 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:44:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:44:44 | D | - sum error = [ 6165.5803, 5835.8170, 5560.3944, 5286.0598, 5015.8898] +25-08-28 08:44:44 | D | - best error = [ 6165.5803, 5835.8170, 5560.3944, 5286.0598, 5015.8898] +25-08-28 08:44:44 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:44:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:44:44 | D | - sum error = [ 4792.1859, 4594.7901, 4414.0793, 4289.3454, 4188.5193] +25-08-28 08:44:44 | D | - best error = [ 4792.1859, 4594.7901, 4414.0793, 4289.3454, 4188.5193] +25-08-28 08:44:44 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:44:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:44:44 | D | - sum error = [ 4128.6843, 4106.4720, 4109.0885, 4140.9963, 4227.7569] +25-08-28 08:44:44 | D | - best error = [ 4128.6843, 4106.4720, 4106.4720, 4106.4720, 4106.4720] +25-08-28 08:44:44 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:44:44 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:44:44 | D | - sum error = [ 9307.3081, 8465.9119, 7922.0043, 7407.9325, 6975.7801] +25-08-28 08:44:44 | D | - best error = [ 4106.4720, 4106.4720, 4106.4720, 4106.4720, 4106.4720] +25-08-28 08:44:44 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:44:44 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:44:44 | D | - sum error = [ 6580.5723, 6241.8625, 5866.0068, 5506.1309, 5234.3435] +25-08-28 08:44:44 | D | - best error = [ 4106.4720, 4106.4720, 4106.4720, 4106.4720, 4106.4720] +25-08-28 08:44:44 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:44:44 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:44:44 | D | - sum error = [ 4951.0857, 4728.4708, 4527.0591, 4346.7878, 4242.2475] +25-08-28 08:44:44 | D | - best error = [ 4106.4720, 4106.4720, 4106.4720, 4106.4720, 4106.4720] +25-08-28 08:44:44 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:44:44 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:44:44 | D | - sum error = [ 4164.5401, 4126.0688, 4134.8313, 4221.8548] +25-08-28 08:44:44 | D | - best error = [ 4106.4720, 4106.4720, 4106.4720, 4106.4720] +25-08-28 08:44:44 | D | + error = 4106.4720 +25-08-28 08:44:44 | D | + scale = [min=0.2489, max=33.4685] +25-08-28 08:44:45 | D | - transformer_blocks.10.ff_context.down_proj +25-08-28 08:44:45 | D | + w: sint4 +25-08-28 08:44:45 | D | + x: uint4 +25-08-28 08:44:45 | D | + y: None +25-08-28 08:44:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:44:45 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:44:45 | D | + x - AbsMax +25-08-28 08:44:45 | D | + x = [min=0.1719, max=46.0000] +25-08-28 08:44:45 | D | + w - AbsMax +25-08-28 08:44:45 | D | + w = [min=0.0233, max=0.7188] +25-08-28 08:44:45 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:44:46 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:45:33 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:45:33 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:45:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:45:33 | D | - sum error = [ 4157.8701, 3909.8882, 3778.2270, 3631.5075, 3663.8605] +25-08-28 08:45:33 | D | - best error = [ 4157.8701, 3909.8882, 3778.2270, 3631.5075, 3631.5075] +25-08-28 08:45:33 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:45:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:45:33 | D | - sum error = [ 4019.2212, 4764.8058, 5903.0921, 7295.4109, 8756.1832] +25-08-28 08:45:33 | D | - best error = [ 3631.5075, 3631.5075, 3631.5075, 3631.5075, 3631.5075] +25-08-28 08:45:33 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:45:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:45:33 | D | - sum error = [ 9910.1330, 11031.7723, 12023.2595, 13094.9896, 13943.8388] +25-08-28 08:45:33 | D | - best error = [ 3631.5075, 3631.5075, 3631.5075, 3631.5075, 3631.5075] +25-08-28 08:45:33 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:45:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:45:33 | D | - sum error = [14799.3578, 15453.3253, 16247.5303, 17091.9452, 18508.0800] +25-08-28 08:45:33 | D | - best error = [ 3631.5075, 3631.5075, 3631.5075, 3631.5075, 3631.5075] +25-08-28 08:45:33 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:45:33 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:45:33 | D | - sum error = [ 4780.1845, 4370.5277, 4144.4076, 3996.2549, 3819.0296] +25-08-28 08:45:33 | D | - best error = [ 3631.5075, 3631.5075, 3631.5075, 3631.5075, 3631.5075] +25-08-28 08:45:33 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:45:33 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:45:33 | D | - sum error = [ 3807.7727, 3858.2598, 4149.3101, 4774.8491, 6143.2340] +25-08-28 08:45:33 | D | - best error = [ 3631.5075, 3631.5075, 3631.5075, 3631.5075, 3631.5075] +25-08-28 08:45:33 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:45:33 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:45:33 | D | - sum error = [ 8286.9797, 10508.3677, 12442.2673, 13740.1392, 14704.7290] +25-08-28 08:45:33 | D | - best error = [ 3631.5075, 3631.5075, 3631.5075, 3631.5075, 3631.5075] +25-08-28 08:45:33 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:45:33 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:45:33 | D | - sum error = [15589.1875, 16368.2693, 17361.4635, 18592.1988] +25-08-28 08:45:33 | D | - best error = [ 3631.5075, 3631.5075, 3631.5075, 3631.5075] +25-08-28 08:45:33 | D | + error = 3631.5075 +25-08-28 08:45:33 | D | + scale = [min=0.7679, max=1.7759] +25-08-28 08:45:53 | D | - Smoothing Diffusion Block transformer_blocks.11 +25-08-28 08:45:53 | D | - Skipping Module transformer_blocks.11.norm1.linear +25-08-28 08:45:53 | D | - Skipping Module transformer_blocks.11.norm1_context.linear +25-08-28 08:45:53 | D | - Smoothing Transformer Block transformer_blocks.11 +25-08-28 08:45:53 | D | - transformer_blocks.11.attn.qkv_proj +25-08-28 08:45:53 | D | + w: sint4 +25-08-28 08:45:53 | D | + x: sint4 +25-08-28 08:45:53 | D | + y: None +25-08-28 08:45:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:45:53 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:45:53 | D | + x - AbsMax +25-08-28 08:45:53 | D | + x = [min=0.1118, max=16.8750] +25-08-28 08:45:53 | D | + w - AbsMax +25-08-28 08:45:53 | D | + w = [min=0.1089, max=0.5273] +25-08-28 08:45:53 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:45:54 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:47:10 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:47:10 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:47:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:47:10 | D | - sum error = [ 7887.8207, 7600.2405, 7333.9253, 6967.0240, 6816.4357] +25-08-28 08:47:10 | D | - best error = [ 7887.8207, 7600.2405, 7333.9253, 6967.0240, 6816.4357] +25-08-28 08:47:10 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:47:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:47:10 | D | - sum error = [ 6729.9664, 6593.8380, 6604.1320, 6515.6872, 6581.7432] +25-08-28 08:47:10 | D | - best error = [ 6729.9664, 6593.8380, 6593.8380, 6515.6872, 6515.6872] +25-08-28 08:47:10 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:47:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:47:10 | D | - sum error = [ 6634.9330, 6702.7278, 6812.1339, 6873.8912, 6977.8865] +25-08-28 08:47:10 | D | - best error = [ 6515.6872, 6515.6872, 6515.6872, 6515.6872, 6515.6872] +25-08-28 08:47:10 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:47:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:47:10 | D | - sum error = [ 7179.1470, 7306.8332, 7446.7778, 7909.0369, 8236.9218] +25-08-28 08:47:10 | D | - best error = [ 6515.6872, 6515.6872, 6515.6872, 6515.6872, 6515.6872] +25-08-28 08:47:10 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:47:10 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:47:10 | D | - sum error = [14472.0796, 13559.0183, 12320.6052, 11265.6739, 10712.6848] +25-08-28 08:47:10 | D | - best error = [ 6515.6872, 6515.6872, 6515.6872, 6515.6872, 6515.6872] +25-08-28 08:47:10 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:47:10 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:47:10 | D | - sum error = [ 9880.5882, 9386.3024, 8698.4762, 8465.6638, 8200.8673] +25-08-28 08:47:10 | D | - best error = [ 6515.6872, 6515.6872, 6515.6872, 6515.6872, 6515.6872] +25-08-28 08:47:10 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:47:10 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:47:10 | D | - sum error = [ 8047.9317, 7872.3420, 7878.9634, 7738.9927, 7717.4050] +25-08-28 08:47:10 | D | - best error = [ 6515.6872, 6515.6872, 6515.6872, 6515.6872, 6515.6872] +25-08-28 08:47:10 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:47:10 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:47:10 | D | - sum error = [ 7880.4238, 7920.3747, 8085.7235, 8350.4467] +25-08-28 08:47:10 | D | - best error = [ 6515.6872, 6515.6872, 6515.6872, 6515.6872] +25-08-28 08:47:10 | D | + error = 6515.6872 +25-08-28 08:47:10 | D | + scale = [min=0.4163, max=3.0967] +25-08-28 08:47:10 | D | - transformer_blocks.11.attn add_qkv_proj +25-08-28 08:47:10 | D | + w: sint4 +25-08-28 08:47:10 | D | + x: sint4 +25-08-28 08:47:10 | D | + y: None +25-08-28 08:47:10 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:47:10 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:47:10 | D | + x - AbsMax +25-08-28 08:47:10 | D | + x = [min=0.1738, max=29.0000] +25-08-28 08:47:10 | D | + w - AbsMax +25-08-28 08:47:10 | D | + w = [min=0.1099, max=0.7109] +25-08-28 08:47:10 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:47:11 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:48:15 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:48:15 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:48:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:48:15 | D | - sum error = [ 2658.6360, 2578.9225, 2524.7264, 2443.0565, 2348.9157] +25-08-28 08:48:15 | D | - best error = [ 2658.6360, 2578.9225, 2524.7264, 2443.0565, 2348.9157] +25-08-28 08:48:15 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:48:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:48:15 | D | - sum error = [ 2292.3454, 2241.1806, 2154.0743, 2097.1055, 2069.2394] +25-08-28 08:48:15 | D | - best error = [ 2292.3454, 2241.1806, 2154.0743, 2097.1055, 2069.2394] +25-08-28 08:48:15 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:48:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:48:15 | D | - sum error = [ 2030.5226, 2029.5260, 2023.8211, 1993.9299, 2019.7415] +25-08-28 08:48:15 | D | - best error = [ 2030.5226, 2029.5260, 2023.8211, 1993.9299, 1993.9299] +25-08-28 08:48:15 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:48:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:48:15 | D | - sum error = [ 2053.9696, 2127.9807, 2124.5729, 2196.3461, 2321.6112] +25-08-28 08:48:15 | D | - best error = [ 1993.9299, 1993.9299, 1993.9299, 1993.9299, 1993.9299] +25-08-28 08:48:15 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:48:15 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:48:15 | D | - sum error = [ 3653.3943, 3509.0221, 3298.7698, 3089.5875, 2921.3856] +25-08-28 08:48:15 | D | - best error = [ 1993.9299, 1993.9299, 1993.9299, 1993.9299, 1993.9299] +25-08-28 08:48:15 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:48:15 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:48:15 | D | - sum error = [ 2800.3745, 2671.4895, 2581.8949, 2444.2395, 2362.1963] +25-08-28 08:48:15 | D | - best error = [ 1993.9299, 1993.9299, 1993.9299, 1993.9299, 1993.9299] +25-08-28 08:48:15 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:48:15 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:48:15 | D | - sum error = [ 2251.2110, 2155.1273, 2124.6202, 2108.2697, 2115.9911] +25-08-28 08:48:15 | D | - best error = [ 1993.9299, 1993.9299, 1993.9299, 1993.9299, 1993.9299] +25-08-28 08:48:15 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:48:15 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:48:15 | D | - sum error = [ 2122.9480, 2144.6826, 2214.7170, 2310.5986] +25-08-28 08:48:15 | D | - best error = [ 1993.9299, 1993.9299, 1993.9299, 1993.9299] +25-08-28 08:48:15 | D | + error = 1993.9299 +25-08-28 08:48:15 | D | + scale = [min=0.3207, max=8.9240] +25-08-28 08:48:15 | D | - transformer_blocks.11.attn.out_proj + transformer_blocks.11.attn.add_out_proj +25-08-28 08:48:15 | D | + w: sint4 +25-08-28 08:48:15 | D | + x: sint4 +25-08-28 08:48:15 | D | + y: None +25-08-28 08:48:15 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:48:15 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:48:16 | D | + x - AbsMax +25-08-28 08:48:16 | D | + x = [min=2.4062, max=20.0000] +25-08-28 08:48:16 | D | + w - AbsMax +25-08-28 08:48:16 | D | + w = [min=0.1245, max=0.3652] +25-08-28 08:48:16 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:48:17 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:50:06 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:50:06 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:50:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:50:06 | D | - sum error = [ 9783.6239, 9762.2383, 9740.2365, 9731.0045, 9714.1933] +25-08-28 08:50:06 | D | - best error = [ 9783.6239, 9762.2383, 9740.2365, 9731.0045, 9714.1933] +25-08-28 08:50:06 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:50:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:50:06 | D | - sum error = [ 9741.4800, 9741.8544, 9777.1906, 9795.6848, 9878.4298] +25-08-28 08:50:06 | D | - best error = [ 9714.1933, 9714.1933, 9714.1933, 9714.1933, 9714.1933] +25-08-28 08:50:06 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:50:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:50:06 | D | - sum error = [ 9936.6369, 9998.0190, 10076.5089, 10110.2965, 10232.0637] +25-08-28 08:50:06 | D | - best error = [ 9714.1933, 9714.1933, 9714.1933, 9714.1933, 9714.1933] +25-08-28 08:50:06 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:50:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:50:06 | D | - sum error = [10323.8307, 10422.2416, 10545.8879, 10674.3850, 10812.0226] +25-08-28 08:50:06 | D | - best error = [ 9714.1933, 9714.1933, 9714.1933, 9714.1933, 9714.1933] +25-08-28 08:50:06 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:50:06 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:50:06 | D | - sum error = [10935.5563, 10777.9419, 10705.4218, 10586.8908, 10515.9563] +25-08-28 08:50:06 | D | - best error = [ 9714.1933, 9714.1933, 9714.1933, 9714.1933, 9714.1933] +25-08-28 08:50:06 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:50:06 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:50:06 | D | - sum error = [10421.4943, 10369.6854, 10328.4489, 10307.0799, 10307.0446] +25-08-28 08:50:06 | D | - best error = [ 9714.1933, 9714.1933, 9714.1933, 9714.1933, 9714.1933] +25-08-28 08:50:06 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:50:06 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:50:06 | D | - sum error = [10299.5957, 10310.2694, 10337.7638, 10376.5860, 10452.7462] +25-08-28 08:50:06 | D | - best error = [ 9714.1933, 9714.1933, 9714.1933, 9714.1933, 9714.1933] +25-08-28 08:50:06 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:50:06 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:50:06 | D | - sum error = [10509.0540, 10598.4059, 10702.5388, 10803.7649] +25-08-28 08:50:06 | D | - best error = [ 9714.1933, 9714.1933, 9714.1933, 9714.1933] +25-08-28 08:50:06 | D | + error = 9714.1933 +25-08-28 08:50:06 | D | + scale = [min=1.1920, max=1.8206] +25-08-28 08:50:06 | D | - transformer_blocks.11.ff.up_proj +25-08-28 08:50:06 | D | + w: sint4 +25-08-28 08:50:06 | D | + x: sint4 +25-08-28 08:50:06 | D | + y: None +25-08-28 08:50:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:50:06 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:50:07 | D | + x - AbsMax +25-08-28 08:50:07 | D | + x = [min=0.1162, max=10.7500] +25-08-28 08:50:07 | D | + w - AbsMax +25-08-28 08:50:07 | D | + w = [min=0.1123, max=0.4980] +25-08-28 08:50:07 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:50:08 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:51:26 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:51:26 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:51:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:51:26 | D | - sum error = [12111.3269, 11758.8326, 11445.3533, 11166.5912, 10937.0738] +25-08-28 08:51:26 | D | - best error = [12111.3269, 11758.8326, 11445.3533, 11166.5912, 10937.0738] +25-08-28 08:51:26 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:51:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:51:26 | D | - sum error = [10733.1490, 10562.1471, 10431.6235, 10331.1815, 10249.9257] +25-08-28 08:51:26 | D | - best error = [10733.1490, 10562.1471, 10431.6235, 10331.1815, 10249.9257] +25-08-28 08:51:26 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:51:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:51:26 | D | - sum error = [10197.8547, 10194.4314, 10200.6314, 10257.7183, 10322.4737] +25-08-28 08:51:26 | D | - best error = [10197.8547, 10194.4314, 10194.4314, 10194.4314, 10194.4314] +25-08-28 08:51:26 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:51:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:51:26 | D | - sum error = [10441.7677, 10572.1687, 10731.3623, 10944.7558, 11184.3441] +25-08-28 08:51:26 | D | - best error = [10194.4314, 10194.4314, 10194.4314, 10194.4314, 10194.4314] +25-08-28 08:51:26 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:51:26 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:51:26 | D | - sum error = [13271.1778, 12763.2009, 12311.6465, 11906.7490, 11542.2563] +25-08-28 08:51:26 | D | - best error = [10194.4314, 10194.4314, 10194.4314, 10194.4314, 10194.4314] +25-08-28 08:51:26 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:51:26 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:51:26 | D | - sum error = [11239.4683, 10976.7649, 10766.0281, 10598.8109, 10468.0909] +25-08-28 08:51:26 | D | - best error = [10194.4314, 10194.4314, 10194.4314, 10194.4314, 10194.4314] +25-08-28 08:51:26 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:51:26 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:51:26 | D | - sum error = [10375.3662, 10345.5668, 10337.1354, 10371.1504, 10432.3497] +25-08-28 08:51:26 | D | - best error = [10194.4314, 10194.4314, 10194.4314, 10194.4314, 10194.4314] +25-08-28 08:51:26 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:51:26 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:51:26 | D | - sum error = [10551.2259, 10704.4185, 10907.4617, 11170.5756] +25-08-28 08:51:26 | D | - best error = [10194.4314, 10194.4314, 10194.4314, 10194.4314] +25-08-28 08:51:26 | D | + error = 10194.4314 +25-08-28 08:51:26 | D | + scale = [min=0.3061, max=3.6921] +25-08-28 08:51:27 | D | - transformer_blocks.11.ff.down_proj +25-08-28 08:51:27 | D | + w: sint4 +25-08-28 08:51:27 | D | + x: uint4 +25-08-28 08:51:27 | D | + y: None +25-08-28 08:51:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:51:27 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:51:29 | D | + x - AbsMax +25-08-28 08:51:29 | D | + x = [min=0.1719, max=22.2500] +25-08-28 08:51:29 | D | + w - AbsMax +25-08-28 08:51:29 | D | + w = [min=0.0532, max=0.6953] +25-08-28 08:51:29 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:51:31 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:53:25 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:53:25 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:53:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:53:25 | D | - sum error = [ 9084.5803, 9099.0158, 9085.2804, 9141.7674, 9162.5630] +25-08-28 08:53:25 | D | - best error = [ 9084.5803, 9084.5803, 9084.5803, 9084.5803, 9084.5803] +25-08-28 08:53:25 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:53:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:53:25 | D | - sum error = [ 9218.1078, 9301.0821, 9413.4461, 9567.1505, 9725.4207] +25-08-28 08:53:25 | D | - best error = [ 9084.5803, 9084.5803, 9084.5803, 9084.5803, 9084.5803] +25-08-28 08:53:25 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:53:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:53:25 | D | - sum error = [ 9880.6397, 10099.1489, 10385.9913, 10774.6859, 11247.8368] +25-08-28 08:53:25 | D | - best error = [ 9084.5803, 9084.5803, 9084.5803, 9084.5803, 9084.5803] +25-08-28 08:53:25 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:53:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:53:25 | D | - sum error = [11847.6907, 12544.7704, 13863.1079, 15496.6340, 17437.7953] +25-08-28 08:53:25 | D | - best error = [ 9084.5803, 9084.5803, 9084.5803, 9084.5803, 9084.5803] +25-08-28 08:53:25 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:53:25 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:53:25 | D | - sum error = [ 9735.9569, 9567.9795, 9444.1910, 9288.2158, 9223.1253] +25-08-28 08:53:25 | D | - best error = [ 9084.5803, 9084.5803, 9084.5803, 9084.5803, 9084.5803] +25-08-28 08:53:25 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:53:25 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:53:25 | D | - sum error = [ 9147.9611, 9158.2553, 9166.4370, 9211.0681, 9357.9476] +25-08-28 08:53:25 | D | - best error = [ 9084.5803, 9084.5803, 9084.5803, 9084.5803, 9084.5803] +25-08-28 08:53:25 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:53:25 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:53:25 | D | - sum error = [ 9514.1263, 9766.0145, 10150.0037, 10586.4109, 11190.0206] +25-08-28 08:53:25 | D | - best error = [ 9084.5803, 9084.5803, 9084.5803, 9084.5803, 9084.5803] +25-08-28 08:53:25 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:53:25 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:53:25 | D | - sum error = [11965.4162, 13319.0961, 15119.1407, 17261.3650] +25-08-28 08:53:25 | D | - best error = [ 9084.5803, 9084.5803, 9084.5803, 9084.5803] +25-08-28 08:53:25 | D | + error = 9084.5803 +25-08-28 08:53:25 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 08:53:25 | D | - transformer_blocks.11.ff_context.up_proj +25-08-28 08:53:25 | D | + w: sint4 +25-08-28 08:53:25 | D | + x: sint4 +25-08-28 08:53:25 | D | + y: None +25-08-28 08:53:25 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:53:25 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:53:25 | D | + x - AbsMax +25-08-28 08:53:25 | D | + x = [min=0.2227, max=65.0000] +25-08-28 08:53:25 | D | + w - AbsMax +25-08-28 08:53:25 | D | + w = [min=0.0928, max=0.5820] +25-08-28 08:53:25 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:53:26 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:54:09 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:54:09 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:54:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:54:09 | D | - sum error = [ 9186.0364, 8753.6744, 8356.2197, 7962.6749, 7573.7259] +25-08-28 08:54:09 | D | - best error = [ 9186.0364, 8753.6744, 8356.2197, 7962.6749, 7573.7259] +25-08-28 08:54:09 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:54:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:54:09 | D | - sum error = [ 7231.1996, 6871.9232, 6546.1253, 6255.6282, 5980.5077] +25-08-28 08:54:09 | D | - best error = [ 7231.1996, 6871.9232, 6546.1253, 6255.6282, 5980.5077] +25-08-28 08:54:09 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:54:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:54:09 | D | - sum error = [ 5781.2941, 5686.2568, 5621.4747, 5624.3154, 5580.9089] +25-08-28 08:54:09 | D | - best error = [ 5781.2941, 5686.2568, 5621.4747, 5621.4747, 5580.9089] +25-08-28 08:54:09 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:54:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:54:09 | D | - sum error = [ 5622.7998, 5674.6045, 5673.0778, 5835.5689, 5991.6896] +25-08-28 08:54:09 | D | - best error = [ 5580.9089, 5580.9089, 5580.9089, 5580.9089, 5580.9089] +25-08-28 08:54:09 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:54:09 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:54:09 | D | - sum error = [ 8231.6054, 7760.2876, 7355.9048, 6971.8333, 6642.4724] +25-08-28 08:54:09 | D | - best error = [ 5580.9089, 5580.9089, 5580.9089, 5580.9089, 5580.9089] +25-08-28 08:54:09 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:54:09 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:54:09 | D | - sum error = [ 6349.9506, 6089.7389, 5919.0905, 5795.1428, 5703.0578] +25-08-28 08:54:09 | D | - best error = [ 5580.9089, 5580.9089, 5580.9089, 5580.9089, 5580.9089] +25-08-28 08:54:09 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:54:09 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:54:09 | D | - sum error = [ 5599.0837, 5511.1678, 5480.5401, 5485.5106, 5523.9312] +25-08-28 08:54:09 | D | - best error = [ 5580.9089, 5511.1678, 5480.5401, 5480.5401, 5480.5401] +25-08-28 08:54:09 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:54:09 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:54:09 | D | - sum error = [ 5612.7054, 5683.1099, 5795.9427, 5971.9826] +25-08-28 08:54:09 | D | - best error = [ 5480.5401, 5480.5401, 5480.5401, 5480.5401] +25-08-28 08:54:09 | D | + error = 5480.5401 +25-08-28 08:54:09 | D | + scale = [min=0.6829, max=32.0300] +25-08-28 08:54:09 | D | - transformer_blocks.11.ff_context.down_proj +25-08-28 08:54:09 | D | + w: sint4 +25-08-28 08:54:09 | D | + x: uint4 +25-08-28 08:54:09 | D | + y: None +25-08-28 08:54:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:54:09 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 08:54:10 | D | + x - AbsMax +25-08-28 08:54:10 | D | + x = [min=0.1719, max=98.5000] +25-08-28 08:54:10 | D | + w - AbsMax +25-08-28 08:54:10 | D | + w = [min=0.0199, max=0.4707] +25-08-28 08:54:10 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 08:54:10 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 08:54:58 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:54:58 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:54:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:54:58 | D | - sum error = [ 2364.7107, 2309.2039, 2313.9648, 2413.0003, 2618.3075] +25-08-28 08:54:58 | D | - best error = [ 2364.7107, 2309.2039, 2309.2039, 2309.2039, 2309.2039] +25-08-28 08:54:58 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:54:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:54:58 | D | - sum error = [ 3033.3955, 3618.8368, 4280.7658, 5015.1435, 5899.7959] +25-08-28 08:54:58 | D | - best error = [ 2309.2039, 2309.2039, 2309.2039, 2309.2039, 2309.2039] +25-08-28 08:54:58 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:54:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:54:58 | D | - sum error = [ 6714.6360, 7677.8424, 9001.0466, 10896.1740, 12776.7464] +25-08-28 08:54:58 | D | - best error = [ 2309.2039, 2309.2039, 2309.2039, 2309.2039, 2309.2039] +25-08-28 08:54:58 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:54:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:54:58 | D | - sum error = [14139.0863, 15438.5460, 16543.8182, 18346.6889, 21277.4216] +25-08-28 08:54:58 | D | - best error = [ 2309.2039, 2309.2039, 2309.2039, 2309.2039, 2309.2039] +25-08-28 08:54:58 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:54:58 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:54:58 | D | - sum error = [ 2356.7131, 2277.2576, 2279.4735, 2349.1114, 2494.8164] +25-08-28 08:54:58 | D | - best error = [ 2309.2039, 2277.2576, 2277.2576, 2277.2576, 2277.2576] +25-08-28 08:54:58 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:54:58 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:54:58 | D | - sum error = [ 2729.3351, 3038.7989, 3596.7649, 4466.2409, 5638.8359] +25-08-28 08:54:58 | D | - best error = [ 2277.2576, 2277.2576, 2277.2576, 2277.2576, 2277.2576] +25-08-28 08:54:58 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:54:58 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:54:58 | D | - sum error = [ 6976.9988, 8785.0314, 10890.5284, 12677.4742, 14343.4953] +25-08-28 08:54:58 | D | - best error = [ 2277.2576, 2277.2576, 2277.2576, 2277.2576, 2277.2576] +25-08-28 08:54:58 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:54:58 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:54:58 | D | - sum error = [15491.9200, 16736.5526, 18472.2962, 21407.9268] +25-08-28 08:54:58 | D | - best error = [ 2277.2576, 2277.2576, 2277.2576, 2277.2576] +25-08-28 08:54:58 | D | + error = 2277.2576 +25-08-28 08:54:58 | D | + scale = [min=1.9195, max=28.4842] +25-08-28 08:55:18 | D | - Smoothing Diffusion Block transformer_blocks.12 +25-08-28 08:55:18 | D | - Skipping Module transformer_blocks.12.norm1.linear +25-08-28 08:55:18 | D | - Skipping Module transformer_blocks.12.norm1_context.linear +25-08-28 08:55:18 | D | - Smoothing Transformer Block transformer_blocks.12 +25-08-28 08:55:18 | D | - transformer_blocks.12.attn.qkv_proj +25-08-28 08:55:18 | D | + w: sint4 +25-08-28 08:55:18 | D | + x: sint4 +25-08-28 08:55:18 | D | + y: None +25-08-28 08:55:18 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:55:18 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 08:55:18 | D | + x - AbsMax +25-08-28 08:55:18 | D | + x = [min=0.0898, max=13.3125] +25-08-28 08:55:18 | D | + w - AbsMax +25-08-28 08:55:18 | D | + w = [min=0.1118, max=0.4609] +25-08-28 08:55:18 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 08:55:20 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 08:56:35 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:56:35 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:56:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:56:35 | D | - sum error = [ 8565.1377, 8266.5277, 7948.1638, 7685.3127, 7481.6554] +25-08-28 08:56:35 | D | - best error = [ 8565.1377, 8266.5277, 7948.1638, 7685.3127, 7481.6554] +25-08-28 08:56:35 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:56:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:56:35 | D | - sum error = [ 7385.3374, 7287.4432, 7172.9628, 7224.7619, 7146.5533] +25-08-28 08:56:35 | D | - best error = [ 7385.3374, 7287.4432, 7172.9628, 7172.9628, 7146.5533] +25-08-28 08:56:35 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:56:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:56:35 | D | - sum error = [ 7184.6200, 7188.3723, 7187.9001, 7257.0209, 7453.0166] +25-08-28 08:56:35 | D | - best error = [ 7146.5533, 7146.5533, 7146.5533, 7146.5533, 7146.5533] +25-08-28 08:56:35 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:56:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:56:35 | D | - sum error = [ 7666.8940, 7841.0038, 8078.3870, 8385.2714, 8691.5218] +25-08-28 08:56:35 | D | - best error = [ 7146.5533, 7146.5533, 7146.5533, 7146.5533, 7146.5533] +25-08-28 08:56:35 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:56:35 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:56:35 | D | - sum error = [13964.2824, 12753.2415, 11800.4081, 11118.8417, 10521.7926] +25-08-28 08:56:35 | D | - best error = [ 7146.5533, 7146.5533, 7146.5533, 7146.5533, 7146.5533] +25-08-28 08:56:35 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:56:35 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:56:35 | D | - sum error = [ 9875.6610, 9497.5736, 8985.1713, 8780.1813, 8515.1846] +25-08-28 08:56:35 | D | - best error = [ 7146.5533, 7146.5533, 7146.5533, 7146.5533, 7146.5533] +25-08-28 08:56:35 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:56:35 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:56:35 | D | - sum error = [ 8401.7489, 8162.2894, 8035.9823, 8045.5956, 8134.9173] +25-08-28 08:56:35 | D | - best error = [ 7146.5533, 7146.5533, 7146.5533, 7146.5533, 7146.5533] +25-08-28 08:56:35 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:56:35 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:56:35 | D | - sum error = [ 8166.3818, 8271.8694, 8506.3440, 8799.6354] +25-08-28 08:56:35 | D | - best error = [ 7146.5533, 7146.5533, 7146.5533, 7146.5533] +25-08-28 08:56:35 | D | + error = 7146.5533 +25-08-28 08:56:35 | D | + scale = [min=0.3381, max=3.2057] +25-08-28 08:56:35 | D | - transformer_blocks.12.attn add_qkv_proj +25-08-28 08:56:35 | D | + w: sint4 +25-08-28 08:56:35 | D | + x: sint4 +25-08-28 08:56:35 | D | + y: None +25-08-28 08:56:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:56:35 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 08:56:35 | D | + x - AbsMax +25-08-28 08:56:35 | D | + x = [min=0.1514, max=29.3750] +25-08-28 08:56:35 | D | + w - AbsMax +25-08-28 08:56:35 | D | + w = [min=0.1309, max=0.5391] +25-08-28 08:56:35 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 08:56:36 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 08:57:41 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:57:41 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:57:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:57:41 | D | - sum error = [ 2010.2800, 1965.6575, 1924.1742, 1879.5918, 1867.2165] +25-08-28 08:57:41 | D | - best error = [ 2010.2800, 1965.6575, 1924.1742, 1879.5918, 1867.2165] +25-08-28 08:57:41 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:57:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:57:41 | D | - sum error = [ 1807.0760, 1781.9369, 1785.8280, 1776.0269, 1752.0721] +25-08-28 08:57:41 | D | - best error = [ 1807.0760, 1781.9369, 1781.9369, 1776.0269, 1752.0721] +25-08-28 08:57:41 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:57:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:57:41 | D | - sum error = [ 1769.3900, 1792.6088, 1779.1239, 1817.3665, 1817.5442] +25-08-28 08:57:41 | D | - best error = [ 1752.0721, 1752.0721, 1752.0721, 1752.0721, 1752.0721] +25-08-28 08:57:41 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:57:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:57:41 | D | - sum error = [ 1877.5293, 1925.7954, 1984.5835, 2082.6680, 2199.5318] +25-08-28 08:57:41 | D | - best error = [ 1752.0721, 1752.0721, 1752.0721, 1752.0721, 1752.0721] +25-08-28 08:57:41 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:57:41 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:57:41 | D | - sum error = [ 2929.1106, 2781.3954, 2670.7018, 2510.1840, 2423.9085] +25-08-28 08:57:41 | D | - best error = [ 1752.0721, 1752.0721, 1752.0721, 1752.0721, 1752.0721] +25-08-28 08:57:41 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:57:41 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:57:41 | D | - sum error = [ 2341.3648, 2260.4813, 2213.7028, 2120.1218, 2060.5121] +25-08-28 08:57:41 | D | - best error = [ 1752.0721, 1752.0721, 1752.0721, 1752.0721, 1752.0721] +25-08-28 08:57:41 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:57:41 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:57:41 | D | - sum error = [ 2006.4047, 1980.0925, 1955.4550, 1921.8760, 1980.1091] +25-08-28 08:57:41 | D | - best error = [ 1752.0721, 1752.0721, 1752.0721, 1752.0721, 1752.0721] +25-08-28 08:57:41 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:57:41 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:57:41 | D | - sum error = [ 2001.3191, 2017.4956, 2114.6017, 2184.3132] +25-08-28 08:57:41 | D | - best error = [ 1752.0721, 1752.0721, 1752.0721, 1752.0721] +25-08-28 08:57:41 | D | + error = 1752.0721 +25-08-28 08:57:41 | D | + scale = [min=0.4276, max=4.5771] +25-08-28 08:57:41 | D | - transformer_blocks.12.attn.out_proj + transformer_blocks.12.attn.add_out_proj +25-08-28 08:57:41 | D | + w: sint4 +25-08-28 08:57:41 | D | + x: sint4 +25-08-28 08:57:41 | D | + y: None +25-08-28 08:57:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:57:41 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 08:57:42 | D | + x - AbsMax +25-08-28 08:57:42 | D | + x = [min=3.8906, max=28.7500] +25-08-28 08:57:42 | D | + w - AbsMax +25-08-28 08:57:42 | D | + w = [min=0.1279, max=0.3242] +25-08-28 08:57:42 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 08:57:43 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 08:59:31 | D | - x / w range = AbsMax / AbsMax +25-08-28 08:59:31 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 08:59:31 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:59:31 | D | - sum error = [10645.9938, 10592.5411, 10580.6748, 10553.4741, 10571.8649] +25-08-28 08:59:31 | D | - best error = [10645.9938, 10592.5411, 10580.6748, 10553.4741, 10553.4741] +25-08-28 08:59:31 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 08:59:31 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:59:31 | D | - sum error = [10553.7345, 10575.1448, 10623.4917, 10676.0135, 10737.0964] +25-08-28 08:59:31 | D | - best error = [10553.4741, 10553.4741, 10553.4741, 10553.4741, 10553.4741] +25-08-28 08:59:31 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 08:59:31 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:59:31 | D | - sum error = [10808.4585, 10883.8456, 10974.2637, 11093.4337, 11198.7299] +25-08-28 08:59:31 | D | - best error = [10553.4741, 10553.4741, 10553.4741, 10553.4741, 10553.4741] +25-08-28 08:59:31 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:59:31 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 08:59:31 | D | - sum error = [11337.9935, 11474.1734, 11635.7326, 11808.7899, 11999.4780] +25-08-28 08:59:31 | D | - best error = [10553.4741, 10553.4741, 10553.4741, 10553.4741, 10553.4741] +25-08-28 08:59:31 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 08:59:31 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 08:59:31 | D | - sum error = [11372.9869, 11270.9638, 11160.5907, 11098.7570, 11067.0390] +25-08-28 08:59:31 | D | - best error = [10553.4741, 10553.4741, 10553.4741, 10553.4741, 10553.4741] +25-08-28 08:59:31 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 08:59:31 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 08:59:31 | D | - sum error = [10998.7377, 10975.8819, 10981.9630, 11010.2065, 11025.2837] +25-08-28 08:59:31 | D | - best error = [10553.4741, 10553.4741, 10553.4741, 10553.4741, 10553.4741] +25-08-28 08:59:31 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 08:59:31 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 08:59:31 | D | - sum error = [11057.6992, 11124.3610, 11207.7816, 11322.6930, 11405.6877] +25-08-28 08:59:31 | D | - best error = [10553.4741, 10553.4741, 10553.4741, 10553.4741, 10553.4741] +25-08-28 08:59:31 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 08:59:31 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 08:59:31 | D | - sum error = [11528.0586, 11689.0780, 11821.7278, 12010.0864] +25-08-28 08:59:31 | D | - best error = [10553.4741, 10553.4741, 10553.4741, 10553.4741] +25-08-28 08:59:31 | D | + error = 10553.4741 +25-08-28 08:59:31 | D | + scale = [min=1.2260, max=1.6550] +25-08-28 08:59:31 | D | - transformer_blocks.12.ff.up_proj +25-08-28 08:59:31 | D | + w: sint4 +25-08-28 08:59:31 | D | + x: sint4 +25-08-28 08:59:31 | D | + y: None +25-08-28 08:59:31 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 08:59:31 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 08:59:32 | D | + x - AbsMax +25-08-28 08:59:32 | D | + x = [min=0.1455, max=8.0000] +25-08-28 08:59:32 | D | + w - AbsMax +25-08-28 08:59:32 | D | + w = [min=0.0933, max=0.5938] +25-08-28 08:59:32 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 08:59:33 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 09:00:49 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:00:49 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:00:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:00:49 | D | - sum error = [12381.2064, 12029.9486, 11703.1961, 11409.0021, 11166.9516] +25-08-28 09:00:49 | D | - best error = [12381.2064, 12029.9486, 11703.1961, 11409.0021, 11166.9516] +25-08-28 09:00:49 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:00:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:00:49 | D | - sum error = [10958.6769, 10791.4896, 10636.2268, 10534.2299, 10455.0971] +25-08-28 09:00:49 | D | - best error = [10958.6769, 10791.4896, 10636.2268, 10534.2299, 10455.0971] +25-08-28 09:00:49 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:00:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:00:49 | D | - sum error = [10414.7726, 10404.7628, 10412.9646, 10471.3365, 10544.0577] +25-08-28 09:00:49 | D | - best error = [10414.7726, 10404.7628, 10404.7628, 10404.7628, 10404.7628] +25-08-28 09:00:49 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:00:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:00:49 | D | - sum error = [10662.3772, 10808.2213, 10988.6727, 11209.1109, 11464.6197] +25-08-28 09:00:49 | D | - best error = [10404.7628, 10404.7628, 10404.7628, 10404.7628, 10404.7628] +25-08-28 09:00:49 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:00:49 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:00:49 | D | - sum error = [13119.7445, 12646.5704, 12224.6584, 11851.4742, 11521.6103] +25-08-28 09:00:49 | D | - best error = [10404.7628, 10404.7628, 10404.7628, 10404.7628, 10404.7628] +25-08-28 09:00:49 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:00:49 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:00:49 | D | - sum error = [11249.9671, 11018.1363, 10833.2809, 10686.6983, 10584.6738] +25-08-28 09:00:49 | D | - best error = [10404.7628, 10404.7628, 10404.7628, 10404.7628, 10404.7628] +25-08-28 09:00:49 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:00:49 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:00:49 | D | - sum error = [10518.4006, 10497.9419, 10511.9475, 10564.9852, 10666.3872] +25-08-28 09:00:49 | D | - best error = [10404.7628, 10404.7628, 10404.7628, 10404.7628, 10404.7628] +25-08-28 09:00:49 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:00:49 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:00:49 | D | - sum error = [10805.0914, 10971.3241, 11183.7364, 11450.2339] +25-08-28 09:00:49 | D | - best error = [10404.7628, 10404.7628, 10404.7628, 10404.7628] +25-08-28 09:00:49 | D | + error = 10404.7628 +25-08-28 09:00:49 | D | + scale = [min=0.3464, max=3.1383] +25-08-28 09:00:49 | D | - transformer_blocks.12.ff.down_proj +25-08-28 09:00:49 | D | + w: sint4 +25-08-28 09:00:49 | D | + x: uint4 +25-08-28 09:00:49 | D | + y: None +25-08-28 09:00:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:00:49 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 09:00:52 | D | + x - AbsMax +25-08-28 09:00:52 | D | + x = [min=0.1719, max=26.6250] +25-08-28 09:00:52 | D | + w - AbsMax +25-08-28 09:00:52 | D | + w = [min=0.0469, max=0.8320] +25-08-28 09:00:52 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 09:00:54 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 09:02:44 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:02:44 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:02:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:02:44 | D | - sum error = [ 9049.0225, 9020.0887, 9106.5150, 9105.7314, 9138.9833] +25-08-28 09:02:44 | D | - best error = [ 9049.0225, 9020.0887, 9020.0887, 9020.0887, 9020.0887] +25-08-28 09:02:44 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:02:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:02:44 | D | - sum error = [ 9183.6803, 9302.2789, 9396.6420, 9517.9763, 9726.0171] +25-08-28 09:02:44 | D | - best error = [ 9020.0887, 9020.0887, 9020.0887, 9020.0887, 9020.0887] +25-08-28 09:02:44 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:02:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:02:44 | D | - sum error = [ 9919.3564, 10113.9693, 10374.1741, 10660.1696, 11005.9980] +25-08-28 09:02:44 | D | - best error = [ 9020.0887, 9020.0887, 9020.0887, 9020.0887, 9020.0887] +25-08-28 09:02:44 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:02:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:02:44 | D | - sum error = [11494.5859, 12210.1767, 13191.9478, 14363.4659, 15939.3922] +25-08-28 09:02:44 | D | - best error = [ 9020.0887, 9020.0887, 9020.0887, 9020.0887, 9020.0887] +25-08-28 09:02:44 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:02:44 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:02:44 | D | - sum error = [ 9695.7595, 9511.5963, 9376.4680, 9266.4151, 9188.5067] +25-08-28 09:02:44 | D | - best error = [ 9020.0887, 9020.0887, 9020.0887, 9020.0887, 9020.0887] +25-08-28 09:02:44 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:02:44 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:02:44 | D | - sum error = [ 9184.3854, 9120.5013, 9147.5221, 9253.5602, 9327.5017] +25-08-28 09:02:44 | D | - best error = [ 9020.0887, 9020.0887, 9020.0887, 9020.0887, 9020.0887] +25-08-28 09:02:44 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:02:44 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:02:44 | D | - sum error = [ 9523.7997, 9752.3146, 10099.9497, 10510.1783, 10874.0083] +25-08-28 09:02:44 | D | - best error = [ 9020.0887, 9020.0887, 9020.0887, 9020.0887, 9020.0887] +25-08-28 09:02:44 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:02:44 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:02:44 | D | - sum error = [11571.1642, 12669.5275, 14005.6166, 15735.1245] +25-08-28 09:02:44 | D | - best error = [ 9020.0887, 9020.0887, 9020.0887, 9020.0887] +25-08-28 09:02:44 | D | + error = 9020.0887 +25-08-28 09:02:44 | D | + scale = [min=0.9157, max=1.1783] +25-08-28 09:02:44 | D | - transformer_blocks.12.ff_context.up_proj +25-08-28 09:02:44 | D | + w: sint4 +25-08-28 09:02:44 | D | + x: sint4 +25-08-28 09:02:44 | D | + y: None +25-08-28 09:02:44 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:02:44 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 09:02:44 | D | + x - AbsMax +25-08-28 09:02:44 | D | + x = [min=0.1338, max=99.0000] +25-08-28 09:02:44 | D | + w - AbsMax +25-08-28 09:02:44 | D | + w = [min=0.1094, max=0.4102] +25-08-28 09:02:44 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 09:02:45 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 09:03:28 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:03:28 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:03:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:03:28 | D | - sum error = [ 7462.3128, 6417.9688, 5623.3979, 4988.9368, 4505.2694] +25-08-28 09:03:28 | D | - best error = [ 7462.3128, 6417.9688, 5623.3979, 4988.9368, 4505.2694] +25-08-28 09:03:28 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:03:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:03:28 | D | - sum error = [ 4078.1023, 3770.2235, 3510.8426, 3258.9483, 3073.2960] +25-08-28 09:03:28 | D | - best error = [ 4078.1023, 3770.2235, 3510.8426, 3258.9483, 3073.2960] +25-08-28 09:03:28 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:03:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:03:28 | D | - sum error = [ 2884.6347, 2715.3956, 2580.7741, 2475.3393, 2395.1348] +25-08-28 09:03:28 | D | - best error = [ 2884.6347, 2715.3956, 2580.7741, 2475.3393, 2395.1348] +25-08-28 09:03:28 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:03:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:03:28 | D | - sum error = [ 2333.6047, 2333.8596, 2373.9218, 2432.8483, 2544.0360] +25-08-28 09:03:28 | D | - best error = [ 2333.6047, 2333.6047, 2333.6047, 2333.6047, 2333.6047] +25-08-28 09:03:28 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:03:28 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:03:28 | D | - sum error = [ 8671.8778, 7221.6178, 6298.3188, 5510.3401, 4905.2924] +25-08-28 09:03:28 | D | - best error = [ 2333.6047, 2333.6047, 2333.6047, 2333.6047, 2333.6047] +25-08-28 09:03:28 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:03:28 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:03:28 | D | - sum error = [ 4386.9563, 4020.3996, 3667.6618, 3408.2638, 3154.2570] +25-08-28 09:03:28 | D | - best error = [ 2333.6047, 2333.6047, 2333.6047, 2333.6047, 2333.6047] +25-08-28 09:03:28 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:03:28 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:03:28 | D | - sum error = [ 2951.6199, 2751.6849, 2592.9849, 2472.9979, 2393.6772] +25-08-28 09:03:28 | D | - best error = [ 2333.6047, 2333.6047, 2333.6047, 2333.6047, 2333.6047] +25-08-28 09:03:28 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:03:28 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:03:28 | D | - sum error = [ 2356.8237, 2359.0621, 2415.0945, 2540.8699] +25-08-28 09:03:28 | D | - best error = [ 2333.6047, 2333.6047, 2333.6047, 2333.6047] +25-08-28 09:03:28 | D | + error = 2333.6047 +25-08-28 09:03:28 | D | + scale = [min=0.2212, max=31.3853] +25-08-28 09:03:28 | D | - transformer_blocks.12.ff_context.down_proj +25-08-28 09:03:28 | D | + w: sint4 +25-08-28 09:03:28 | D | + x: uint4 +25-08-28 09:03:28 | D | + y: None +25-08-28 09:03:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:03:28 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 09:03:28 | D | + x - AbsMax +25-08-28 09:03:28 | D | + x = [min=0.1719, max=40.7500] +25-08-28 09:03:28 | D | + w - AbsMax +25-08-28 09:03:28 | D | + w = [min=0.0288, max=0.5234] +25-08-28 09:03:28 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 09:03:29 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 09:04:18 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:04:18 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:04:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:04:18 | D | - sum error = [ 3360.1995, 2961.9720, 2647.4712, 2397.0142, 2238.3150] +25-08-28 09:04:18 | D | - best error = [ 3360.1995, 2961.9720, 2647.4712, 2397.0142, 2238.3150] +25-08-28 09:04:18 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:04:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:04:18 | D | - sum error = [ 2179.6538, 2244.4897, 2375.1013, 2759.9922, 3489.3772] +25-08-28 09:04:18 | D | - best error = [ 2179.6538, 2179.6538, 2179.6538, 2179.6538, 2179.6538] +25-08-28 09:04:18 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:04:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:04:18 | D | - sum error = [ 5138.1446, 8210.9235, 12915.5150, 19319.4518, 27136.2017] +25-08-28 09:04:18 | D | - best error = [ 2179.6538, 2179.6538, 2179.6538, 2179.6538, 2179.6538] +25-08-28 09:04:18 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:04:18 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:04:18 | D | - sum error = [36519.6469, 46835.1786, 56448.2313, 65455.6421, 71459.3389] +25-08-28 09:04:18 | D | - best error = [ 2179.6538, 2179.6538, 2179.6538, 2179.6538, 2179.6538] +25-08-28 09:04:18 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:04:18 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:04:18 | D | - sum error = [ 4446.2114, 3791.6412, 3375.9756, 3018.3925, 2649.3896] +25-08-28 09:04:18 | D | - best error = [ 2179.6538, 2179.6538, 2179.6538, 2179.6538, 2179.6538] +25-08-28 09:04:18 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:04:18 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:04:18 | D | - sum error = [ 2411.9397, 2336.7764, 2337.7799, 2444.2776, 2776.3757] +25-08-28 09:04:18 | D | - best error = [ 2179.6538, 2179.6538, 2179.6538, 2179.6538, 2179.6538] +25-08-28 09:04:18 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:04:18 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:04:18 | D | - sum error = [ 3498.9783, 5418.0015, 9564.9882, 16744.8514, 26026.1444] +25-08-28 09:04:18 | D | - best error = [ 2179.6538, 2179.6538, 2179.6538, 2179.6538, 2179.6538] +25-08-28 09:04:18 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:04:18 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:04:18 | D | - sum error = [36820.3544, 49015.2316, 60199.6635, 70411.9044] +25-08-28 09:04:18 | D | - best error = [ 2179.6538, 2179.6538, 2179.6538, 2179.6538] +25-08-28 09:04:18 | D | + error = 2179.6538 +25-08-28 09:04:18 | D | + scale = [min=0.6439, max=2.5266] +25-08-28 09:04:38 | D | - Smoothing Diffusion Block transformer_blocks.13 +25-08-28 09:04:38 | D | - Skipping Module transformer_blocks.13.norm1.linear +25-08-28 09:04:38 | D | - Skipping Module transformer_blocks.13.norm1_context.linear +25-08-28 09:04:38 | D | - Smoothing Transformer Block transformer_blocks.13 +25-08-28 09:04:38 | D | - transformer_blocks.13.attn.qkv_proj +25-08-28 09:04:38 | D | + w: sint4 +25-08-28 09:04:38 | D | + x: sint4 +25-08-28 09:04:38 | D | + y: None +25-08-28 09:04:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:04:38 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 09:04:38 | D | + x - AbsMax +25-08-28 09:04:38 | D | + x = [min=0.1152, max=17.7500] +25-08-28 09:04:38 | D | + w - AbsMax +25-08-28 09:04:38 | D | + w = [min=0.1074, max=0.8320] +25-08-28 09:04:38 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 09:04:40 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 09:05:57 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:05:57 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:05:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:05:57 | D | - sum error = [10012.2932, 9600.8241, 9366.1484, 9175.5336, 8999.1120] +25-08-28 09:05:57 | D | - best error = [10012.2932, 9600.8241, 9366.1484, 9175.5336, 8999.1120] +25-08-28 09:05:57 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:05:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:05:57 | D | - sum error = [ 8826.3424, 8697.0151, 8581.7322, 8536.2708, 8447.6913] +25-08-28 09:05:57 | D | - best error = [ 8826.3424, 8697.0151, 8581.7322, 8536.2708, 8447.6913] +25-08-28 09:05:57 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:05:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:05:57 | D | - sum error = [ 8423.1681, 8461.1035, 8431.9630, 8495.3390, 8583.4361] +25-08-28 09:05:57 | D | - best error = [ 8423.1681, 8423.1681, 8423.1681, 8423.1681, 8423.1681] +25-08-28 09:05:57 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:05:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:05:57 | D | - sum error = [ 8735.7372, 8936.2775, 9073.1132, 9260.3516, 9479.4460] +25-08-28 09:05:57 | D | - best error = [ 8423.1681, 8423.1681, 8423.1681, 8423.1681, 8423.1681] +25-08-28 09:05:57 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:05:57 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:05:57 | D | - sum error = [20944.0604, 19075.3588, 17756.6195, 16312.8730, 14940.4755] +25-08-28 09:05:57 | D | - best error = [ 8423.1681, 8423.1681, 8423.1681, 8423.1681, 8423.1681] +25-08-28 09:05:57 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:05:57 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:05:57 | D | - sum error = [13686.7457, 12690.2779, 11821.8367, 11136.0417, 10606.4154] +25-08-28 09:05:57 | D | - best error = [ 8423.1681, 8423.1681, 8423.1681, 8423.1681, 8423.1681] +25-08-28 09:05:57 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:05:57 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:05:57 | D | - sum error = [10167.2753, 9928.7671, 9613.2484, 9342.3924, 9256.1046] +25-08-28 09:05:57 | D | - best error = [ 8423.1681, 8423.1681, 8423.1681, 8423.1681, 8423.1681] +25-08-28 09:05:57 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:05:57 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:05:57 | D | - sum error = [ 9227.2878, 9226.0709, 9367.2895, 9501.3586] +25-08-28 09:05:57 | D | - best error = [ 8423.1681, 8423.1681, 8423.1681, 8423.1681] +25-08-28 09:05:57 | D | + error = 8423.1681 +25-08-28 09:05:57 | D | + scale = [min=0.3395, max=4.2131] +25-08-28 09:05:57 | D | - transformer_blocks.13.attn add_qkv_proj +25-08-28 09:05:57 | D | + w: sint4 +25-08-28 09:05:57 | D | + x: sint4 +25-08-28 09:05:57 | D | + y: None +25-08-28 09:05:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:05:57 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 09:05:57 | D | + x - AbsMax +25-08-28 09:05:57 | D | + x = [min=0.1260, max=33.0000] +25-08-28 09:05:57 | D | + w - AbsMax +25-08-28 09:05:57 | D | + w = [min=0.1235, max=0.5781] +25-08-28 09:05:57 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 09:05:58 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 09:07:05 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:07:05 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:07:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:07:05 | D | - sum error = [ 2073.4870, 1979.4665, 1876.9606, 1770.4686, 1715.1914] +25-08-28 09:07:05 | D | - best error = [ 2073.4870, 1979.4665, 1876.9606, 1770.4686, 1715.1914] +25-08-28 09:07:05 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:07:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:07:05 | D | - sum error = [ 1643.6396, 1603.9391, 1559.0697, 1533.1096, 1477.9531] +25-08-28 09:07:05 | D | - best error = [ 1643.6396, 1603.9391, 1559.0697, 1533.1096, 1477.9531] +25-08-28 09:07:05 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:07:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:07:05 | D | - sum error = [ 1462.6646, 1437.3702, 1432.5609, 1418.7426, 1445.7196] +25-08-28 09:07:05 | D | - best error = [ 1462.6646, 1437.3702, 1432.5609, 1418.7426, 1418.7426] +25-08-28 09:07:05 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:07:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:07:05 | D | - sum error = [ 1461.5253, 1497.7549, 1539.7157, 1594.9709, 1695.9790] +25-08-28 09:07:05 | D | - best error = [ 1418.7426, 1418.7426, 1418.7426, 1418.7426, 1418.7426] +25-08-28 09:07:05 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:07:05 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:07:05 | D | - sum error = [ 2750.2472, 2659.9339, 2521.9323, 2366.1801, 2333.0736] +25-08-28 09:07:05 | D | - best error = [ 1418.7426, 1418.7426, 1418.7426, 1418.7426, 1418.7426] +25-08-28 09:07:05 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:07:05 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:07:05 | D | - sum error = [ 2180.0218, 2103.1412, 1981.6011, 1824.6914, 1726.6277] +25-08-28 09:07:05 | D | - best error = [ 1418.7426, 1418.7426, 1418.7426, 1418.7426, 1418.7426] +25-08-28 09:07:05 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:07:05 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:07:05 | D | - sum error = [ 1674.8357, 1621.1941, 1561.0686, 1563.8469, 1561.4039] +25-08-28 09:07:05 | D | - best error = [ 1418.7426, 1418.7426, 1418.7426, 1418.7426, 1418.7426] +25-08-28 09:07:05 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:07:05 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:07:05 | D | - sum error = [ 1560.2580, 1563.2746, 1624.1469, 1695.6429] +25-08-28 09:07:05 | D | - best error = [ 1418.7426, 1418.7426, 1418.7426, 1418.7426] +25-08-28 09:07:05 | D | + error = 1418.7426 +25-08-28 09:07:05 | D | + scale = [min=0.2601, max=9.7059] +25-08-28 09:07:06 | D | - transformer_blocks.13.attn.out_proj + transformer_blocks.13.attn.add_out_proj +25-08-28 09:07:06 | D | + w: sint4 +25-08-28 09:07:06 | D | + x: sint4 +25-08-28 09:07:06 | D | + y: None +25-08-28 09:07:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:07:06 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 09:07:06 | D | + x - AbsMax +25-08-28 09:07:06 | D | + x = [min=3.3906, max=16.1250] +25-08-28 09:07:06 | D | + w - AbsMax +25-08-28 09:07:06 | D | + w = [min=0.1289, max=0.3145] +25-08-28 09:07:06 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 09:07:08 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 09:08:56 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:08:56 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:08:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:08:56 | D | - sum error = [12165.7506, 12115.7405, 12057.1990, 12020.2176, 11999.5640] +25-08-28 09:08:56 | D | - best error = [12165.7506, 12115.7405, 12057.1990, 12020.2176, 11999.5640] +25-08-28 09:08:56 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:08:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:08:56 | D | - sum error = [11956.9882, 11939.6603, 11920.4745, 11932.3553, 11928.5546] +25-08-28 09:08:56 | D | - best error = [11956.9882, 11939.6603, 11920.4745, 11920.4745, 11920.4745] +25-08-28 09:08:56 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:08:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:08:56 | D | - sum error = [11949.2340, 11960.5451, 12000.0887, 12037.0016, 12067.8422] +25-08-28 09:08:56 | D | - best error = [11920.4745, 11920.4745, 11920.4745, 11920.4745, 11920.4745] +25-08-28 09:08:56 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:08:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:08:56 | D | - sum error = [12111.8045, 12180.7954, 12253.1962, 12334.4247, 12424.1146] +25-08-28 09:08:56 | D | - best error = [11920.4745, 11920.4745, 11920.4745, 11920.4745, 11920.4745] +25-08-28 09:08:56 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:08:56 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:08:56 | D | - sum error = [13407.4594, 13251.8624, 13082.7734, 12920.1957, 12787.3275] +25-08-28 09:08:56 | D | - best error = [11920.4745, 11920.4745, 11920.4745, 11920.4745, 11920.4745] +25-08-28 09:08:56 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:08:56 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:08:56 | D | - sum error = [12657.9189, 12539.8781, 12456.7797, 12387.0106, 12316.6931] +25-08-28 09:08:56 | D | - best error = [11920.4745, 11920.4745, 11920.4745, 11920.4745, 11920.4745] +25-08-28 09:08:56 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:08:56 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:08:56 | D | - sum error = [12274.3590, 12247.8530, 12240.5274, 12253.6721, 12233.7618] +25-08-28 09:08:56 | D | - best error = [11920.4745, 11920.4745, 11920.4745, 11920.4745, 11920.4745] +25-08-28 09:08:56 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:08:56 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:08:56 | D | - sum error = [12265.0958, 12304.0117, 12361.1954, 12415.8698] +25-08-28 09:08:56 | D | - best error = [11920.4745, 11920.4745, 11920.4745, 11920.4745] +25-08-28 09:08:56 | D | + error = 11920.4745 +25-08-28 09:08:56 | D | + scale = [min=1.5332, max=2.6462] +25-08-28 09:08:57 | D | - transformer_blocks.13.ff.up_proj +25-08-28 09:08:57 | D | + w: sint4 +25-08-28 09:08:57 | D | + x: sint4 +25-08-28 09:08:57 | D | + y: None +25-08-28 09:08:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:08:57 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 09:08:57 | D | + x - AbsMax +25-08-28 09:08:57 | D | + x = [min=0.0957, max=7.8125] +25-08-28 09:08:57 | D | + w - AbsMax +25-08-28 09:08:57 | D | + w = [min=0.1167, max=0.5820] +25-08-28 09:08:57 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 09:08:59 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 09:10:16 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:10:16 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:10:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:10:16 | D | - sum error = [10864.8391, 10658.4325, 10477.4695, 10319.1984, 10186.3159] +25-08-28 09:10:16 | D | - best error = [10864.8391, 10658.4325, 10477.4695, 10319.1984, 10186.3159] +25-08-28 09:10:16 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:10:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:10:16 | D | - sum error = [10077.7529, 9977.9072, 9901.7267, 9856.1055, 9818.9478] +25-08-28 09:10:16 | D | - best error = [10077.7529, 9977.9072, 9901.7267, 9856.1055, 9818.9478] +25-08-28 09:10:16 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:10:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:10:16 | D | - sum error = [ 9805.1113, 9813.6187, 9825.1856, 9873.9593, 9943.3765] +25-08-28 09:10:16 | D | - best error = [ 9805.1113, 9805.1113, 9805.1113, 9805.1113, 9805.1113] +25-08-28 09:10:16 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:10:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:10:16 | D | - sum error = [10019.4998, 10118.4683, 10238.5413, 10376.3462, 10539.9472] +25-08-28 09:10:16 | D | - best error = [ 9805.1113, 9805.1113, 9805.1113, 9805.1113, 9805.1113] +25-08-28 09:10:16 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:10:16 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:10:16 | D | - sum error = [12282.2050, 11881.1602, 11523.9920, 11202.7229, 10921.3354] +25-08-28 09:10:16 | D | - best error = [ 9805.1113, 9805.1113, 9805.1113, 9805.1113, 9805.1113] +25-08-28 09:10:16 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:10:16 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:10:16 | D | - sum error = [10684.8626, 10481.1118, 10315.7216, 10181.8104, 10085.2296] +25-08-28 09:10:16 | D | - best error = [ 9805.1113, 9805.1113, 9805.1113, 9805.1113, 9805.1113] +25-08-28 09:10:16 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:10:16 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:10:16 | D | - sum error = [10012.2145, 9966.4670, 9956.6709, 9986.4444, 10032.9764] +25-08-28 09:10:16 | D | - best error = [ 9805.1113, 9805.1113, 9805.1113, 9805.1113, 9805.1113] +25-08-28 09:10:16 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:10:16 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:10:16 | D | - sum error = [10108.7645, 10212.5687, 10349.4274, 10518.0858] +25-08-28 09:10:16 | D | - best error = [ 9805.1113, 9805.1113, 9805.1113, 9805.1113] +25-08-28 09:10:16 | D | + error = 9805.1113 +25-08-28 09:10:16 | D | + scale = [min=0.3094, max=2.7951] +25-08-28 09:10:16 | D | - transformer_blocks.13.ff.down_proj +25-08-28 09:10:16 | D | + w: sint4 +25-08-28 09:10:16 | D | + x: uint4 +25-08-28 09:10:16 | D | + y: None +25-08-28 09:10:16 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:10:16 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 09:10:19 | D | + x - AbsMax +25-08-28 09:10:19 | D | + x = [min=0.1719, max=18.2500] +25-08-28 09:10:19 | D | + w - AbsMax +25-08-28 09:10:19 | D | + w = [min=0.0757, max=1.0156] +25-08-28 09:10:19 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 09:10:21 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 09:12:15 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:12:15 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:12:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:12:15 | D | - sum error = [ 9215.9991, 9222.2986, 9235.6362, 9280.6651, 9347.7244] +25-08-28 09:12:15 | D | - best error = [ 9215.9991, 9215.9991, 9215.9991, 9215.9991, 9215.9991] +25-08-28 09:12:15 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:12:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:12:15 | D | - sum error = [ 9409.0845, 9520.8424, 9640.9361, 9751.4393, 9880.9323] +25-08-28 09:12:15 | D | - best error = [ 9215.9991, 9215.9991, 9215.9991, 9215.9991, 9215.9991] +25-08-28 09:12:15 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:12:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:12:15 | D | - sum error = [10070.8578, 10260.3610, 10433.2354, 10635.1527, 10863.9978] +25-08-28 09:12:15 | D | - best error = [ 9215.9991, 9215.9991, 9215.9991, 9215.9991, 9215.9991] +25-08-28 09:12:15 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:12:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:12:15 | D | - sum error = [11239.8975, 11661.6664, 12175.9133, 12896.3815, 13624.1192] +25-08-28 09:12:15 | D | - best error = [ 9215.9991, 9215.9991, 9215.9991, 9215.9991, 9215.9991] +25-08-28 09:12:15 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:12:15 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:12:15 | D | - sum error = [10950.4444, 10668.1309, 10450.3275, 10229.9529, 10031.1016] +25-08-28 09:12:15 | D | - best error = [ 9215.9991, 9215.9991, 9215.9991, 9215.9991, 9215.9991] +25-08-28 09:12:15 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:12:15 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:12:15 | D | - sum error = [ 9873.5943, 9774.6996, 9707.7717, 9697.5717, 9778.6658] +25-08-28 09:12:15 | D | - best error = [ 9215.9991, 9215.9991, 9215.9991, 9215.9991, 9215.9991] +25-08-28 09:12:15 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:12:15 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:12:15 | D | - sum error = [ 9850.3819, 9997.0484, 10113.3619, 10420.6606, 10765.6885] +25-08-28 09:12:15 | D | - best error = [ 9215.9991, 9215.9991, 9215.9991, 9215.9991, 9215.9991] +25-08-28 09:12:15 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:12:15 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:12:15 | D | - sum error = [11163.4317, 11792.8209, 12602.2049, 13449.6917] +25-08-28 09:12:15 | D | - best error = [ 9215.9991, 9215.9991, 9215.9991, 9215.9991] +25-08-28 09:12:15 | D | + error = 9215.9991 +25-08-28 09:12:15 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 09:12:15 | D | - transformer_blocks.13.ff_context.up_proj +25-08-28 09:12:15 | D | + w: sint4 +25-08-28 09:12:15 | D | + x: sint4 +25-08-28 09:12:15 | D | + y: None +25-08-28 09:12:15 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:12:15 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 09:12:15 | D | + x - AbsMax +25-08-28 09:12:15 | D | + x = [min=0.1426, max=152.0000] +25-08-28 09:12:15 | D | + w - AbsMax +25-08-28 09:12:15 | D | + w = [min=0.0977, max=0.3672] +25-08-28 09:12:15 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 09:12:16 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 09:12:59 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:12:59 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:12:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:12:59 | D | - sum error = [10740.3978, 8941.4598, 7560.0233, 6513.7461, 5590.6780] +25-08-28 09:12:59 | D | - best error = [10740.3978, 8941.4598, 7560.0233, 6513.7461, 5590.6780] +25-08-28 09:12:59 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:12:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:12:59 | D | - sum error = [ 4984.7959, 4505.4112, 4106.6193, 3759.7141, 3499.5397] +25-08-28 09:12:59 | D | - best error = [ 4984.7959, 4505.4112, 4106.6193, 3759.7141, 3499.5397] +25-08-28 09:12:59 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:12:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:12:59 | D | - sum error = [ 3311.8788, 3171.1540, 3012.3532, 2928.5836, 2871.8903] +25-08-28 09:12:59 | D | - best error = [ 3311.8788, 3171.1540, 3012.3532, 2928.5836, 2871.8903] +25-08-28 09:12:59 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:12:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:12:59 | D | - sum error = [ 2814.2397, 2803.7808, 2823.5747, 2891.1601, 3016.5543] +25-08-28 09:12:59 | D | - best error = [ 2814.2397, 2803.7808, 2803.7808, 2803.7808, 2803.7808] +25-08-28 09:12:59 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:12:59 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:12:59 | D | - sum error = [ 8401.3023, 7093.1725, 6125.5563, 5415.7011, 4841.5553] +25-08-28 09:12:59 | D | - best error = [ 2803.7808, 2803.7808, 2803.7808, 2803.7808, 2803.7808] +25-08-28 09:12:59 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:12:59 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:12:59 | D | - sum error = [ 4401.8626, 4077.8660, 3766.2299, 3513.4103, 3344.9463] +25-08-28 09:12:59 | D | - best error = [ 2803.7808, 2803.7808, 2803.7808, 2803.7808, 2803.7808] +25-08-28 09:12:59 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:12:59 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:12:59 | D | - sum error = [ 3151.5261, 3007.2512, 2894.6394, 2813.2504, 2776.2693] +25-08-28 09:12:59 | D | - best error = [ 2803.7808, 2803.7808, 2803.7808, 2803.7808, 2776.2693] +25-08-28 09:12:59 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:12:59 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:12:59 | D | - sum error = [ 2783.9540, 2786.5988, 2852.5885, 2998.3921] +25-08-28 09:12:59 | D | - best error = [ 2776.2693, 2776.2693, 2776.2693, 2776.2693] +25-08-28 09:12:59 | D | + error = 2776.2693 +25-08-28 09:12:59 | D | + scale = [min=0.3775, max=70.0908] +25-08-28 09:12:59 | D | - transformer_blocks.13.ff_context.down_proj +25-08-28 09:12:59 | D | + w: sint4 +25-08-28 09:12:59 | D | + x: uint4 +25-08-28 09:12:59 | D | + y: None +25-08-28 09:12:59 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:12:59 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 09:13:00 | D | + x - AbsMax +25-08-28 09:13:00 | D | + x = [min=0.1719, max=20.6250] +25-08-28 09:13:00 | D | + w - AbsMax +25-08-28 09:13:00 | D | + w = [min=0.0208, max=0.4688] +25-08-28 09:13:00 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 09:13:00 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 09:13:50 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:13:50 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:13:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:13:50 | D | - sum error = [ 1486.9963, 1415.9906, 1367.3371, 1409.9747, 1488.8448] +25-08-28 09:13:50 | D | - best error = [ 1486.9963, 1415.9906, 1367.3371, 1367.3371, 1367.3371] +25-08-28 09:13:50 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:13:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:13:50 | D | - sum error = [ 1662.7251, 1921.8362, 2476.4289, 3334.4085, 5078.0738] +25-08-28 09:13:50 | D | - best error = [ 1367.3371, 1367.3371, 1367.3371, 1367.3371, 1367.3371] +25-08-28 09:13:50 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:13:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:13:50 | D | - sum error = [ 7514.7428, 10924.6876, 15249.4779, 20394.3953, 26295.4414] +25-08-28 09:13:50 | D | - best error = [ 1367.3371, 1367.3371, 1367.3371, 1367.3371, 1367.3371] +25-08-28 09:13:50 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:13:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:13:50 | D | - sum error = [32250.6940, 38230.3419, 43634.1180, 48109.9062, 52039.4287] +25-08-28 09:13:50 | D | - best error = [ 1367.3371, 1367.3371, 1367.3371, 1367.3371, 1367.3371] +25-08-28 09:13:50 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:13:50 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:13:50 | D | - sum error = [ 1747.7042, 1572.0569, 1438.8556, 1353.3984, 1353.9921] +25-08-28 09:13:50 | D | - best error = [ 1367.3371, 1367.3371, 1367.3371, 1353.3984, 1353.3984] +25-08-28 09:13:50 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:13:50 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:13:50 | D | - sum error = [ 1436.6520, 1587.5237, 1821.4932, 2252.4785, 2937.1357] +25-08-28 09:13:50 | D | - best error = [ 1353.3984, 1353.3984, 1353.3984, 1353.3984, 1353.3984] +25-08-28 09:13:50 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:13:50 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:13:50 | D | - sum error = [ 4424.2622, 7414.9595, 12240.5699, 18700.5360, 26079.4601] +25-08-28 09:13:50 | D | - best error = [ 1353.3984, 1353.3984, 1353.3984, 1353.3984, 1353.3984] +25-08-28 09:13:50 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:13:50 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:13:50 | D | - sum error = [33649.3942, 40654.0023, 46532.6655, 51412.7903] +25-08-28 09:13:50 | D | - best error = [ 1353.3984, 1353.3984, 1353.3984, 1353.3984] +25-08-28 09:13:50 | D | + error = 1353.3984 +25-08-28 09:13:50 | D | + scale = [min=1.4344, max=18.7952] +25-08-28 09:14:09 | D | - Smoothing Diffusion Block transformer_blocks.14 +25-08-28 09:14:09 | D | - Skipping Module transformer_blocks.14.norm1.linear +25-08-28 09:14:09 | D | - Skipping Module transformer_blocks.14.norm1_context.linear +25-08-28 09:14:09 | D | - Smoothing Transformer Block transformer_blocks.14 +25-08-28 09:14:09 | D | - transformer_blocks.14.attn.qkv_proj +25-08-28 09:14:09 | D | + w: sint4 +25-08-28 09:14:09 | D | + x: sint4 +25-08-28 09:14:09 | D | + y: None +25-08-28 09:14:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:14:09 | D | + finished parsing calibration arguments, ram usage: 16.9 +25-08-28 09:14:10 | D | + x - AbsMax +25-08-28 09:14:10 | D | + x = [min=0.1182, max=25.1250] +25-08-28 09:14:10 | D | + w - AbsMax +25-08-28 09:14:10 | D | + w = [min=0.1147, max=0.7305] +25-08-28 09:14:10 | D | + finished resetting calibrator, ram usage: 17.0 +25-08-28 09:14:11 | D | + finished calculating the original outputs, ram usage: 17.2 +25-08-28 09:15:23 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:15:23 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:15:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:15:23 | D | - sum error = [10531.6475, 10176.2445, 9576.1767, 9168.6640, 8803.5411] +25-08-28 09:15:23 | D | - best error = [10531.6475, 10176.2445, 9576.1767, 9168.6640, 8803.5411] +25-08-28 09:15:23 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:15:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:15:23 | D | - sum error = [ 8467.4190, 8246.3054, 7983.0796, 7909.8503, 7924.3427] +25-08-28 09:15:23 | D | - best error = [ 8467.4190, 8246.3054, 7983.0796, 7909.8503, 7909.8503] +25-08-28 09:15:23 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:15:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:15:23 | D | - sum error = [ 7735.6771, 7727.4141, 7859.0170, 7908.8029, 7898.4649] +25-08-28 09:15:23 | D | - best error = [ 7735.6771, 7727.4141, 7727.4141, 7727.4141, 7727.4141] +25-08-28 09:15:23 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:15:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:15:23 | D | - sum error = [ 8021.0294, 8162.2825, 8197.3767, 8444.9530, 8573.1702] +25-08-28 09:15:23 | D | - best error = [ 7727.4141, 7727.4141, 7727.4141, 7727.4141, 7727.4141] +25-08-28 09:15:23 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:15:23 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:15:23 | D | - sum error = [25759.1702, 22959.7449, 20512.6045, 18467.7119, 16371.1532] +25-08-28 09:15:23 | D | - best error = [ 7727.4141, 7727.4141, 7727.4141, 7727.4141, 7727.4141] +25-08-28 09:15:23 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:15:23 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:15:23 | D | - sum error = [14878.1225, 13564.6930, 12558.8547, 11390.6832, 10575.1932] +25-08-28 09:15:23 | D | - best error = [ 7727.4141, 7727.4141, 7727.4141, 7727.4141, 7727.4141] +25-08-28 09:15:23 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:15:23 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:15:23 | D | - sum error = [ 9917.1661, 9444.8479, 9158.9082, 8774.6503, 8743.9872] +25-08-28 09:15:23 | D | - best error = [ 7727.4141, 7727.4141, 7727.4141, 7727.4141, 7727.4141] +25-08-28 09:15:23 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:15:23 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:15:23 | D | - sum error = [ 8591.3994, 8490.6023, 8578.4241, 8771.3128] +25-08-28 09:15:23 | D | - best error = [ 7727.4141, 7727.4141, 7727.4141, 7727.4141] +25-08-28 09:15:23 | D | + error = 7727.4141 +25-08-28 09:15:23 | D | + scale = [min=0.3089, max=5.8892] +25-08-28 09:15:24 | D | - transformer_blocks.14.attn add_qkv_proj +25-08-28 09:15:24 | D | + w: sint4 +25-08-28 09:15:24 | D | + x: sint4 +25-08-28 09:15:24 | D | + y: None +25-08-28 09:15:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:15:24 | D | + finished parsing calibration arguments, ram usage: 18.6 +25-08-28 09:15:24 | D | + x - AbsMax +25-08-28 09:15:24 | D | + x = [min=0.1504, max=18.1250] +25-08-28 09:15:24 | D | + w - AbsMax +25-08-28 09:15:24 | D | + w = [min=0.1152, max=0.4961] +25-08-28 09:15:24 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 09:15:25 | D | + finished calculating the original outputs, ram usage: 18.6 +25-08-28 09:16:28 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:16:28 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:16:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:16:28 | D | - sum error = [ 2323.1357, 2200.5644, 2117.4950, 2061.8853, 1989.4953] +25-08-28 09:16:28 | D | - best error = [ 2323.1357, 2200.5644, 2117.4950, 2061.8853, 1989.4953] +25-08-28 09:16:28 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:16:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:16:28 | D | - sum error = [ 1923.1653, 1867.9060, 1825.1714, 1781.9002, 1765.6312] +25-08-28 09:16:28 | D | - best error = [ 1923.1653, 1867.9060, 1825.1714, 1781.9002, 1765.6312] +25-08-28 09:16:28 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:16:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:16:28 | D | - sum error = [ 1716.9194, 1691.7487, 1692.8489, 1695.7239, 1722.3443] +25-08-28 09:16:28 | D | - best error = [ 1716.9194, 1691.7487, 1691.7487, 1691.7487, 1691.7487] +25-08-28 09:16:28 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:16:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:16:28 | D | - sum error = [ 1736.3388, 1792.5619, 1865.6628, 1953.3693, 2037.6072] +25-08-28 09:16:28 | D | - best error = [ 1691.7487, 1691.7487, 1691.7487, 1691.7487, 1691.7487] +25-08-28 09:16:28 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:16:28 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:16:28 | D | - sum error = [ 3152.9767, 2966.9437, 2833.8918, 2733.0709, 2594.3972] +25-08-28 09:16:28 | D | - best error = [ 1691.7487, 1691.7487, 1691.7487, 1691.7487, 1691.7487] +25-08-28 09:16:28 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:16:28 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:16:28 | D | - sum error = [ 2439.4434, 2323.4006, 2232.1745, 2149.2930, 2049.6705] +25-08-28 09:16:28 | D | - best error = [ 1691.7487, 1691.7487, 1691.7487, 1691.7487, 1691.7487] +25-08-28 09:16:28 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:16:28 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:16:28 | D | - sum error = [ 1947.3019, 1913.2402, 1863.8653, 1826.3499, 1815.9959] +25-08-28 09:16:28 | D | - best error = [ 1691.7487, 1691.7487, 1691.7487, 1691.7487, 1691.7487] +25-08-28 09:16:28 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:16:28 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:16:28 | D | - sum error = [ 1836.8238, 1888.1520, 1963.8061, 2029.1971] +25-08-28 09:16:28 | D | - best error = [ 1691.7487, 1691.7487, 1691.7487, 1691.7487] +25-08-28 09:16:28 | D | + error = 1691.7487 +25-08-28 09:16:28 | D | + scale = [min=0.3528, max=4.9210] +25-08-28 09:16:28 | D | - transformer_blocks.14.attn.out_proj + transformer_blocks.14.attn.add_out_proj +25-08-28 09:16:28 | D | + w: sint4 +25-08-28 09:16:28 | D | + x: sint4 +25-08-28 09:16:28 | D | + y: None +25-08-28 09:16:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:16:28 | D | + finished parsing calibration arguments, ram usage: 18.6 +25-08-28 09:16:28 | D | + x - AbsMax +25-08-28 09:16:28 | D | + x = [min=3.1875, max=31.3750] +25-08-28 09:16:28 | D | + w - AbsMax +25-08-28 09:16:28 | D | + w = [min=0.1182, max=0.4102] +25-08-28 09:16:28 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 09:16:29 | D | + finished calculating the original outputs, ram usage: 18.6 +25-08-28 09:18:16 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:18:16 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:18:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:18:16 | D | - sum error = [ 9950.5863, 9927.3928, 9901.7320, 9877.7807, 9861.3829] +25-08-28 09:18:16 | D | - best error = [ 9950.5863, 9927.3928, 9901.7320, 9877.7807, 9861.3829] +25-08-28 09:18:16 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:18:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:18:16 | D | - sum error = [ 9880.4312, 9888.3107, 9929.1776, 9959.8892, 9982.6217] +25-08-28 09:18:16 | D | - best error = [ 9861.3829, 9861.3829, 9861.3829, 9861.3829, 9861.3829] +25-08-28 09:18:16 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:18:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:18:16 | D | - sum error = [10033.2463, 10098.2010, 10172.6520, 10223.4002, 10304.9774] +25-08-28 09:18:16 | D | - best error = [ 9861.3829, 9861.3829, 9861.3829, 9861.3829, 9861.3829] +25-08-28 09:18:16 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:18:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:18:16 | D | - sum error = [10380.9680, 10462.3454, 10540.6535, 10661.0375, 10733.8463] +25-08-28 09:18:16 | D | - best error = [ 9861.3829, 9861.3829, 9861.3829, 9861.3829, 9861.3829] +25-08-28 09:18:16 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:18:16 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:18:16 | D | - sum error = [10549.6123, 10473.2811, 10395.7414, 10347.3633, 10279.0381] +25-08-28 09:18:16 | D | - best error = [ 9861.3829, 9861.3829, 9861.3829, 9861.3829, 9861.3829] +25-08-28 09:18:16 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:18:16 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:18:16 | D | - sum error = [10232.6583, 10212.8402, 10188.5834, 10208.9333, 10208.2618] +25-08-28 09:18:16 | D | - best error = [ 9861.3829, 9861.3829, 9861.3829, 9861.3829, 9861.3829] +25-08-28 09:18:16 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:18:16 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:18:16 | D | - sum error = [10208.2919, 10231.4413, 10260.8873, 10318.6817, 10382.9599] +25-08-28 09:18:16 | D | - best error = [ 9861.3829, 9861.3829, 9861.3829, 9861.3829, 9861.3829] +25-08-28 09:18:16 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:18:16 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:18:16 | D | - sum error = [10481.2927, 10566.6059, 10656.3727, 10742.6485] +25-08-28 09:18:16 | D | - best error = [ 9861.3829, 9861.3829, 9861.3829, 9861.3829] +25-08-28 09:18:16 | D | + error = 9861.3829 +25-08-28 09:18:16 | D | + scale = [min=1.2609, max=1.9921] +25-08-28 09:18:16 | D | - transformer_blocks.14.ff.up_proj +25-08-28 09:18:16 | D | + w: sint4 +25-08-28 09:18:16 | D | + x: sint4 +25-08-28 09:18:16 | D | + y: None +25-08-28 09:18:16 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:18:16 | D | + finished parsing calibration arguments, ram usage: 18.6 +25-08-28 09:18:17 | D | + x - AbsMax +25-08-28 09:18:17 | D | + x = [min=0.1455, max=6.1875] +25-08-28 09:18:17 | D | + w - AbsMax +25-08-28 09:18:17 | D | + w = [min=0.1113, max=0.5312] +25-08-28 09:18:17 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 09:18:18 | D | + finished calculating the original outputs, ram usage: 18.6 +25-08-28 09:19:35 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:19:35 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:19:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:19:35 | D | - sum error = [10561.5412, 10368.8109, 10184.6964, 10031.7352, 9894.4041] +25-08-28 09:19:35 | D | - best error = [10561.5412, 10368.8109, 10184.6964, 10031.7352, 9894.4041] +25-08-28 09:19:35 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:19:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:19:35 | D | - sum error = [ 9779.5239, 9682.4571, 9602.1639, 9538.8897, 9493.3796] +25-08-28 09:19:35 | D | - best error = [ 9779.5239, 9682.4571, 9602.1639, 9538.8897, 9493.3796] +25-08-28 09:19:35 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:19:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:19:35 | D | - sum error = [ 9465.5679, 9467.9045, 9466.9345, 9495.3159, 9526.5108] +25-08-28 09:19:35 | D | - best error = [ 9465.5679, 9465.5679, 9465.5679, 9465.5679, 9465.5679] +25-08-28 09:19:35 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:19:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:19:35 | D | - sum error = [ 9588.7630, 9661.4367, 9748.8100, 9862.6864, 9997.2344] +25-08-28 09:19:35 | D | - best error = [ 9465.5679, 9465.5679, 9465.5679, 9465.5679, 9465.5679] +25-08-28 09:19:35 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:19:35 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:19:35 | D | - sum error = [11134.7636, 10854.7649, 10593.3243, 10373.5865, 10173.8976] +25-08-28 09:19:35 | D | - best error = [ 9465.5679, 9465.5679, 9465.5679, 9465.5679, 9465.5679] +25-08-28 09:19:35 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:19:35 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:19:35 | D | - sum error = [10009.3308, 9864.8082, 9739.9474, 9649.4601, 9581.9501] +25-08-28 09:19:35 | D | - best error = [ 9465.5679, 9465.5679, 9465.5679, 9465.5679, 9465.5679] +25-08-28 09:19:35 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:19:35 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:19:35 | D | - sum error = [ 9539.6470, 9511.5433, 9512.1922, 9534.9739, 9571.9093] +25-08-28 09:19:35 | D | - best error = [ 9465.5679, 9465.5679, 9465.5679, 9465.5679, 9465.5679] +25-08-28 09:19:35 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:19:35 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:19:35 | D | - sum error = [ 9634.1389, 9727.1028, 9842.6078, 9981.9369] +25-08-28 09:19:35 | D | - best error = [ 9465.5679, 9465.5679, 9465.5679, 9465.5679] +25-08-28 09:19:35 | D | + error = 9465.5679 +25-08-28 09:19:35 | D | + scale = [min=0.3815, max=2.4875] +25-08-28 09:19:36 | D | - transformer_blocks.14.ff.down_proj +25-08-28 09:19:36 | D | + w: sint4 +25-08-28 09:19:36 | D | + x: uint4 +25-08-28 09:19:36 | D | + y: None +25-08-28 09:19:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:19:36 | D | + finished parsing calibration arguments, ram usage: 18.7 +25-08-28 09:19:38 | D | + x - AbsMax +25-08-28 09:19:38 | D | + x = [min=0.1719, max=15.6250] +25-08-28 09:19:38 | D | + w - AbsMax +25-08-28 09:19:38 | D | + w = [min=0.0806, max=0.7305] +25-08-28 09:19:38 | D | + finished resetting calibrator, ram usage: 18.7 +25-08-28 09:19:40 | D | + finished calculating the original outputs, ram usage: 18.7 +25-08-28 09:21:30 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:21:30 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:21:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:21:30 | D | - sum error = [ 9126.2711, 9121.8016, 9169.5823, 9151.0804, 9184.2818] +25-08-28 09:21:30 | D | - best error = [ 9126.2711, 9121.8016, 9121.8016, 9121.8016, 9121.8016] +25-08-28 09:21:30 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:21:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:21:30 | D | - sum error = [ 9226.4901, 9306.1923, 9422.5364, 9508.6260, 9596.3276] +25-08-28 09:21:30 | D | - best error = [ 9121.8016, 9121.8016, 9121.8016, 9121.8016, 9121.8016] +25-08-28 09:21:30 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:21:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:21:30 | D | - sum error = [ 9748.9606, 9936.5125, 10076.8162, 10282.7253, 10535.9096] +25-08-28 09:21:30 | D | - best error = [ 9121.8016, 9121.8016, 9121.8016, 9121.8016, 9121.8016] +25-08-28 09:21:30 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:21:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:21:30 | D | - sum error = [10806.1105, 11137.3031, 11578.3840, 12124.2441, 12661.5133] +25-08-28 09:21:30 | D | - best error = [ 9121.8016, 9121.8016, 9121.8016, 9121.8016, 9121.8016] +25-08-28 09:21:30 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:21:30 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:21:30 | D | - sum error = [10170.9095, 9951.5329, 9792.2929, 9670.9702, 9532.5472] +25-08-28 09:21:30 | D | - best error = [ 9121.8016, 9121.8016, 9121.8016, 9121.8016, 9121.8016] +25-08-28 09:21:30 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:21:30 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:21:30 | D | - sum error = [ 9442.9809, 9404.7116, 9363.7851, 9337.2865, 9425.1073] +25-08-28 09:21:30 | D | - best error = [ 9121.8016, 9121.8016, 9121.8016, 9121.8016, 9121.8016] +25-08-28 09:21:30 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:21:30 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:21:30 | D | - sum error = [ 9551.3312, 9626.4627, 9823.7097, 10016.7795, 10309.0426] +25-08-28 09:21:30 | D | - best error = [ 9121.8016, 9121.8016, 9121.8016, 9121.8016, 9121.8016] +25-08-28 09:21:30 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:21:30 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:21:30 | D | - sum error = [10686.0028, 11195.2133, 11824.7422, 12540.9811] +25-08-28 09:21:30 | D | - best error = [ 9121.8016, 9121.8016, 9121.8016, 9121.8016] +25-08-28 09:21:30 | D | + error = 9121.8016 +25-08-28 09:21:30 | D | + scale = [min=0.9157, max=1.1473] +25-08-28 09:21:30 | D | - transformer_blocks.14.ff_context.up_proj +25-08-28 09:21:30 | D | + w: sint4 +25-08-28 09:21:30 | D | + x: sint4 +25-08-28 09:21:30 | D | + y: None +25-08-28 09:21:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:21:30 | D | + finished parsing calibration arguments, ram usage: 18.7 +25-08-28 09:21:30 | D | + x - AbsMax +25-08-28 09:21:30 | D | + x = [min=0.1484, max=53.5000] +25-08-28 09:21:30 | D | + w - AbsMax +25-08-28 09:21:30 | D | + w = [min=0.1099, max=0.4473] +25-08-28 09:21:30 | D | + finished resetting calibrator, ram usage: 18.7 +25-08-28 09:21:31 | D | + finished calculating the original outputs, ram usage: 18.7 +25-08-28 09:22:14 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:22:14 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:22:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:22:14 | D | - sum error = [ 5950.5096, 5474.4965, 5063.7058, 4679.0589, 4342.6775] +25-08-28 09:22:14 | D | - best error = [ 5950.5096, 5474.4965, 5063.7058, 4679.0589, 4342.6775] +25-08-28 09:22:14 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:22:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:22:14 | D | - sum error = [ 4036.9482, 3758.0069, 3529.3814, 3299.5527, 3128.4191] +25-08-28 09:22:14 | D | - best error = [ 4036.9482, 3758.0069, 3529.3814, 3299.5527, 3128.4191] +25-08-28 09:22:14 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:22:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:22:14 | D | - sum error = [ 2976.2634, 2873.1428, 2788.3547, 2739.9924, 2723.6688] +25-08-28 09:22:14 | D | - best error = [ 2976.2634, 2873.1428, 2788.3547, 2739.9924, 2723.6688] +25-08-28 09:22:14 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:22:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:22:14 | D | - sum error = [ 2763.7484, 2823.2282, 2933.5146, 3088.4606, 3252.5990] +25-08-28 09:22:14 | D | - best error = [ 2723.6688, 2723.6688, 2723.6688, 2723.6688, 2723.6688] +25-08-28 09:22:14 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:22:14 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:22:14 | D | - sum error = [ 6518.3854, 5942.0993, 5410.7820, 4944.6513, 4554.8142] +25-08-28 09:22:14 | D | - best error = [ 2723.6688, 2723.6688, 2723.6688, 2723.6688, 2723.6688] +25-08-28 09:22:14 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:22:14 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:22:14 | D | - sum error = [ 4190.9199, 3887.8097, 3603.1749, 3371.1950, 3156.5704] +25-08-28 09:22:14 | D | - best error = [ 2723.6688, 2723.6688, 2723.6688, 2723.6688, 2723.6688] +25-08-28 09:22:14 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:22:14 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:22:14 | D | - sum error = [ 3002.7847, 2875.0705, 2792.0780, 2754.8010, 2751.3466] +25-08-28 09:22:14 | D | - best error = [ 2723.6688, 2723.6688, 2723.6688, 2723.6688, 2723.6688] +25-08-28 09:22:14 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:22:14 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:22:14 | D | - sum error = [ 2809.3419, 2909.6852, 3056.3437, 3232.7860] +25-08-28 09:22:14 | D | - best error = [ 2723.6688, 2723.6688, 2723.6688, 2723.6688] +25-08-28 09:22:14 | D | + error = 2723.6688 +25-08-28 09:22:14 | D | + scale = [min=0.2631, max=16.2124] +25-08-28 09:22:14 | D | - transformer_blocks.14.ff_context.down_proj +25-08-28 09:22:14 | D | + w: sint4 +25-08-28 09:22:14 | D | + x: uint4 +25-08-28 09:22:14 | D | + y: None +25-08-28 09:22:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:22:14 | D | + finished parsing calibration arguments, ram usage: 18.7 +25-08-28 09:22:14 | D | + x - AbsMax +25-08-28 09:22:14 | D | + x = [min=0.1719, max=37.2500] +25-08-28 09:22:14 | D | + w - AbsMax +25-08-28 09:22:14 | D | + w = [min=0.0298, max=0.7344] +25-08-28 09:22:14 | D | + finished resetting calibrator, ram usage: 18.7 +25-08-28 09:22:15 | D | + finished calculating the original outputs, ram usage: 18.7 +25-08-28 09:23:05 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:23:05 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:23:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:23:05 | D | - sum error = [ 4084.6726, 3685.0336, 3251.8753, 3040.9712, 2827.7041] +25-08-28 09:23:05 | D | - best error = [ 4084.6726, 3685.0336, 3251.8753, 3040.9712, 2827.7041] +25-08-28 09:23:05 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:23:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:23:05 | D | - sum error = [ 2805.0109, 2662.8490, 2730.5643, 3014.2211, 3635.7610] +25-08-28 09:23:05 | D | - best error = [ 2805.0109, 2662.8490, 2662.8490, 2662.8490, 2662.8490] +25-08-28 09:23:05 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:23:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:23:05 | D | - sum error = [ 4630.2572, 6473.8230, 9319.0085, 13141.1744, 17917.9686] +25-08-28 09:23:05 | D | - best error = [ 2662.8490, 2662.8490, 2662.8490, 2662.8490, 2662.8490] +25-08-28 09:23:05 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:23:05 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:23:05 | D | - sum error = [23881.1229, 30833.3119, 38593.2338, 47026.9451, 55398.7933] +25-08-28 09:23:05 | D | - best error = [ 2662.8490, 2662.8490, 2662.8490, 2662.8490, 2662.8490] +25-08-28 09:23:05 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:23:05 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:23:05 | D | - sum error = [ 4408.9112, 3901.1084, 3540.7184, 3216.3417, 3014.6754] +25-08-28 09:23:05 | D | - best error = [ 2662.8490, 2662.8490, 2662.8490, 2662.8490, 2662.8490] +25-08-28 09:23:05 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:23:05 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:23:05 | D | - sum error = [ 2978.5688, 2971.0567, 3104.8475, 3358.2146, 3993.0691] +25-08-28 09:23:05 | D | - best error = [ 2662.8490, 2662.8490, 2662.8490, 2662.8490, 2662.8490] +25-08-28 09:23:05 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:23:05 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:23:05 | D | - sum error = [ 4906.5666, 6751.9159, 10253.1697, 15827.4319, 22920.7490] +25-08-28 09:23:05 | D | - best error = [ 2662.8490, 2662.8490, 2662.8490, 2662.8490, 2662.8490] +25-08-28 09:23:05 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:23:05 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:23:05 | D | - sum error = [31158.1648, 39901.7417, 48585.6618, 56408.1447] +25-08-28 09:23:05 | D | - best error = [ 2662.8490, 2662.8490, 2662.8490, 2662.8490] +25-08-28 09:23:05 | D | + error = 2662.8490 +25-08-28 09:23:05 | D | + scale = [min=0.5896, max=2.9603] +25-08-28 09:23:25 | D | - Smoothing Diffusion Block transformer_blocks.15 +25-08-28 09:23:25 | D | - Skipping Module transformer_blocks.15.norm1.linear +25-08-28 09:23:25 | D | - Skipping Module transformer_blocks.15.norm1_context.linear +25-08-28 09:23:25 | D | - Smoothing Transformer Block transformer_blocks.15 +25-08-28 09:23:25 | D | - transformer_blocks.15.attn.qkv_proj +25-08-28 09:23:25 | D | + w: sint4 +25-08-28 09:23:25 | D | + x: sint4 +25-08-28 09:23:25 | D | + y: None +25-08-28 09:23:25 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:23:25 | D | + finished parsing calibration arguments, ram usage: 17.7 +25-08-28 09:23:26 | D | + x - AbsMax +25-08-28 09:23:26 | D | + x = [min=0.1465, max=13.6250] +25-08-28 09:23:26 | D | + w - AbsMax +25-08-28 09:23:26 | D | + w = [min=0.1250, max=0.6250] +25-08-28 09:23:26 | D | + finished resetting calibrator, ram usage: 17.8 +25-08-28 09:23:27 | D | + finished calculating the original outputs, ram usage: 17.9 +25-08-28 09:24:39 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:24:39 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:24:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:24:39 | D | - sum error = [ 9941.3593, 9429.1251, 9059.0622, 8839.1384, 8770.8244] +25-08-28 09:24:39 | D | - best error = [ 9941.3593, 9429.1251, 9059.0622, 8839.1384, 8770.8244] +25-08-28 09:24:39 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:24:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:24:39 | D | - sum error = [ 8647.0679, 8667.7923, 8557.6997, 8592.7784, 8647.5578] +25-08-28 09:24:39 | D | - best error = [ 8647.0679, 8647.0679, 8557.6997, 8557.6997, 8557.6997] +25-08-28 09:24:39 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:24:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:24:39 | D | - sum error = [ 8535.3046, 8425.5108, 8547.2845, 8681.7160, 8732.1759] +25-08-28 09:24:39 | D | - best error = [ 8535.3046, 8425.5108, 8425.5108, 8425.5108, 8425.5108] +25-08-28 09:24:39 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:24:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:24:39 | D | - sum error = [ 8835.3182, 8936.6643, 9216.6258, 9341.6778, 9792.8382] +25-08-28 09:24:39 | D | - best error = [ 8425.5108, 8425.5108, 8425.5108, 8425.5108, 8425.5108] +25-08-28 09:24:39 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:24:39 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:24:39 | D | - sum error = [31491.2072, 27656.0949, 24641.4935, 21453.6874, 19117.1261] +25-08-28 09:24:39 | D | - best error = [ 8425.5108, 8425.5108, 8425.5108, 8425.5108, 8425.5108] +25-08-28 09:24:39 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:24:39 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:24:39 | D | - sum error = [17232.6629, 15539.9346, 14189.1553, 13217.8570, 12484.6132] +25-08-28 09:24:39 | D | - best error = [ 8425.5108, 8425.5108, 8425.5108, 8425.5108, 8425.5108] +25-08-28 09:24:39 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:24:39 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:24:39 | D | - sum error = [11630.7269, 11117.7357, 10569.4348, 10409.1990, 10288.2395] +25-08-28 09:24:39 | D | - best error = [ 8425.5108, 8425.5108, 8425.5108, 8425.5108, 8425.5108] +25-08-28 09:24:39 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:24:39 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:24:39 | D | - sum error = [10061.3391, 9938.5319, 9664.5277, 9704.4093] +25-08-28 09:24:39 | D | - best error = [ 8425.5108, 8425.5108, 8425.5108, 8425.5108] +25-08-28 09:24:39 | D | + error = 8425.5108 +25-08-28 09:24:39 | D | + scale = [min=0.3477, max=4.2062] +25-08-28 09:24:40 | D | - transformer_blocks.15.attn add_qkv_proj +25-08-28 09:24:40 | D | + w: sint4 +25-08-28 09:24:40 | D | + x: sint4 +25-08-28 09:24:40 | D | + y: None +25-08-28 09:24:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:24:40 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 09:24:40 | D | + x - AbsMax +25-08-28 09:24:40 | D | + x = [min=0.1279, max=22.8750] +25-08-28 09:24:40 | D | + w - AbsMax +25-08-28 09:24:40 | D | + w = [min=0.1094, max=0.5625] +25-08-28 09:24:40 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 09:24:41 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 09:25:43 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:25:43 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:25:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:25:43 | D | - sum error = [ 2335.3427, 2251.5544, 2161.6323, 2019.5283, 1961.5232] +25-08-28 09:25:43 | D | - best error = [ 2335.3427, 2251.5544, 2161.6323, 2019.5283, 1961.5232] +25-08-28 09:25:43 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:25:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:25:43 | D | - sum error = [ 1878.1276, 1810.6083, 1759.9974, 1720.5254, 1664.3452] +25-08-28 09:25:43 | D | - best error = [ 1878.1276, 1810.6083, 1759.9974, 1720.5254, 1664.3452] +25-08-28 09:25:43 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:25:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:25:43 | D | - sum error = [ 1655.3372, 1615.6287, 1612.5515, 1603.7025, 1634.6237] +25-08-28 09:25:43 | D | - best error = [ 1655.3372, 1615.6287, 1612.5515, 1603.7025, 1603.7025] +25-08-28 09:25:43 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:25:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:25:43 | D | - sum error = [ 1667.4879, 1723.4029, 1791.0098, 1876.1416, 2001.0262] +25-08-28 09:25:43 | D | - best error = [ 1603.7025, 1603.7025, 1603.7025, 1603.7025, 1603.7025] +25-08-28 09:25:43 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:25:43 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:25:43 | D | - sum error = [ 3262.8028, 3034.6581, 2799.3554, 2619.8815, 2463.6554] +25-08-28 09:25:43 | D | - best error = [ 1603.7025, 1603.7025, 1603.7025, 1603.7025, 1603.7025] +25-08-28 09:25:43 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:25:43 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:25:43 | D | - sum error = [ 2336.1721, 2195.0356, 2121.6802, 1995.3591, 1927.9356] +25-08-28 09:25:43 | D | - best error = [ 1603.7025, 1603.7025, 1603.7025, 1603.7025, 1603.7025] +25-08-28 09:25:43 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:25:43 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:25:43 | D | - sum error = [ 1837.8971, 1763.5267, 1741.5477, 1706.6294, 1709.7918] +25-08-28 09:25:43 | D | - best error = [ 1603.7025, 1603.7025, 1603.7025, 1603.7025, 1603.7025] +25-08-28 09:25:43 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:25:43 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:25:43 | D | - sum error = [ 1776.0678, 1793.5670, 1902.5380, 1990.2575] +25-08-28 09:25:43 | D | - best error = [ 1603.7025, 1603.7025, 1603.7025, 1603.7025] +25-08-28 09:25:43 | D | + error = 1603.7025 +25-08-28 09:25:43 | D | + scale = [min=0.2627, max=7.6486] +25-08-28 09:25:43 | D | - transformer_blocks.15.attn.out_proj + transformer_blocks.15.attn.add_out_proj +25-08-28 09:25:43 | D | + w: sint4 +25-08-28 09:25:43 | D | + x: sint4 +25-08-28 09:25:43 | D | + y: None +25-08-28 09:25:43 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:25:43 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 09:25:44 | D | + x - AbsMax +25-08-28 09:25:44 | D | + x = [min=3.2969, max=13.9375] +25-08-28 09:25:44 | D | + w - AbsMax +25-08-28 09:25:44 | D | + w = [min=0.1299, max=0.3633] +25-08-28 09:25:44 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 09:25:45 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 09:27:30 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:27:30 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:27:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:27:30 | D | - sum error = [ 8884.2787, 8875.9243, 8856.3631, 8848.9489, 8843.8245] +25-08-28 09:27:30 | D | - best error = [ 8884.2787, 8875.9243, 8856.3631, 8848.9489, 8843.8245] +25-08-28 09:27:30 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:27:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:27:30 | D | - sum error = [ 8865.8107, 8846.1711, 8851.3994, 8884.5416, 8890.7786] +25-08-28 09:27:30 | D | - best error = [ 8843.8245, 8843.8245, 8843.8245, 8843.8245, 8843.8245] +25-08-28 09:27:30 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:27:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:27:30 | D | - sum error = [ 8910.2507, 8948.7197, 9011.4105, 9019.7672, 9075.1569] +25-08-28 09:27:30 | D | - best error = [ 8843.8245, 8843.8245, 8843.8245, 8843.8245, 8843.8245] +25-08-28 09:27:30 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:27:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:27:30 | D | - sum error = [ 9118.3209, 9166.8206, 9226.0314, 9299.6335, 9361.9648] +25-08-28 09:27:30 | D | - best error = [ 8843.8245, 8843.8245, 8843.8245, 8843.8245, 8843.8245] +25-08-28 09:27:30 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:27:30 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:27:30 | D | - sum error = [ 9280.3125, 9210.4371, 9165.5817, 9148.9816, 9097.8112] +25-08-28 09:27:30 | D | - best error = [ 8843.8245, 8843.8245, 8843.8245, 8843.8245, 8843.8245] +25-08-28 09:27:30 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:27:30 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:27:30 | D | - sum error = [ 9087.4128, 9039.9628, 9048.9301, 9023.2819, 9048.6874] +25-08-28 09:27:30 | D | - best error = [ 8843.8245, 8843.8245, 8843.8245, 8843.8245, 8843.8245] +25-08-28 09:27:30 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:27:30 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:27:30 | D | - sum error = [ 9049.7903, 9042.4674, 9089.8660, 9133.7988, 9160.4899] +25-08-28 09:27:30 | D | - best error = [ 8843.8245, 8843.8245, 8843.8245, 8843.8245, 8843.8245] +25-08-28 09:27:30 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:27:30 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:27:30 | D | - sum error = [ 9184.2696, 9230.6052, 9304.1900, 9362.3048] +25-08-28 09:27:30 | D | - best error = [ 8843.8245, 8843.8245, 8843.8245, 8843.8245] +25-08-28 09:27:30 | D | + error = 8843.8245 +25-08-28 09:27:30 | D | + scale = [min=1.2695, max=1.6937] +25-08-28 09:27:30 | D | - transformer_blocks.15.ff.up_proj +25-08-28 09:27:30 | D | + w: sint4 +25-08-28 09:27:30 | D | + x: sint4 +25-08-28 09:27:30 | D | + y: None +25-08-28 09:27:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:27:30 | D | + finished parsing calibration arguments, ram usage: 18.9 +25-08-28 09:27:31 | D | + x - AbsMax +25-08-28 09:27:31 | D | + x = [min=0.2256, max=5.5938] +25-08-28 09:27:31 | D | + w - AbsMax +25-08-28 09:27:31 | D | + w = [min=0.1030, max=0.4551] +25-08-28 09:27:31 | D | + finished resetting calibrator, ram usage: 18.9 +25-08-28 09:27:32 | D | + finished calculating the original outputs, ram usage: 18.9 +25-08-28 09:28:47 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:28:47 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:28:47 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:28:47 | D | - sum error = [10368.7480, 10205.0682, 10062.7630, 9925.5541, 9810.5070] +25-08-28 09:28:47 | D | - best error = [10368.7480, 10205.0682, 10062.7630, 9925.5541, 9810.5070] +25-08-28 09:28:47 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:28:47 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:28:47 | D | - sum error = [ 9726.2120, 9647.6766, 9581.8361, 9542.4223, 9514.8209] +25-08-28 09:28:47 | D | - best error = [ 9726.2120, 9647.6766, 9581.8361, 9542.4223, 9514.8209] +25-08-28 09:28:47 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:28:47 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:28:47 | D | - sum error = [ 9494.8820, 9502.2216, 9524.7576, 9561.2865, 9613.7697] +25-08-28 09:28:47 | D | - best error = [ 9494.8820, 9494.8820, 9494.8820, 9494.8820, 9494.8820] +25-08-28 09:28:47 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:28:47 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:28:47 | D | - sum error = [ 9678.8152, 9764.5342, 9868.1480, 9978.8528, 10122.9956] +25-08-28 09:28:47 | D | - best error = [ 9494.8820, 9494.8820, 9494.8820, 9494.8820, 9494.8820] +25-08-28 09:28:47 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:28:47 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:28:47 | D | - sum error = [11384.4610, 11068.4832, 10786.1418, 10539.8342, 10325.7767] +25-08-28 09:28:47 | D | - best error = [ 9494.8820, 9494.8820, 9494.8820, 9494.8820, 9494.8820] +25-08-28 09:28:47 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:28:47 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:28:47 | D | - sum error = [10143.4109, 9981.2536, 9858.6429, 9756.6289, 9673.9758] +25-08-28 09:28:47 | D | - best error = [ 9494.8820, 9494.8820, 9494.8820, 9494.8820, 9494.8820] +25-08-28 09:28:47 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:28:47 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:28:47 | D | - sum error = [ 9625.5576, 9600.4758, 9600.8825, 9620.4549, 9666.7111] +25-08-28 09:28:47 | D | - best error = [ 9494.8820, 9494.8820, 9494.8820, 9494.8820, 9494.8820] +25-08-28 09:28:47 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:28:47 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:28:47 | D | - sum error = [ 9739.3901, 9831.7486, 9959.1229, 10106.9786] +25-08-28 09:28:47 | D | - best error = [ 9494.8820, 9494.8820, 9494.8820, 9494.8820] +25-08-28 09:28:47 | D | + error = 9494.8820 +25-08-28 09:28:47 | D | + scale = [min=0.4750, max=2.3651] +25-08-28 09:28:47 | D | - transformer_blocks.15.ff.down_proj +25-08-28 09:28:47 | D | + w: sint4 +25-08-28 09:28:47 | D | + x: uint4 +25-08-28 09:28:47 | D | + y: None +25-08-28 09:28:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:28:47 | D | + finished parsing calibration arguments, ram usage: 18.5 +25-08-28 09:28:49 | D | + x - AbsMax +25-08-28 09:28:49 | D | + x = [min=0.1719, max=25.2500] +25-08-28 09:28:49 | D | + w - AbsMax +25-08-28 09:28:49 | D | + w = [min=0.0684, max=0.8633] +25-08-28 09:28:49 | D | + finished resetting calibrator, ram usage: 18.5 +25-08-28 09:28:50 | D | + finished calculating the original outputs, ram usage: 18.5 +25-08-28 09:30:29 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:30:29 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:30:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:30:29 | D | - sum error = [ 8455.3372, 8442.8299, 8478.5088, 8492.9109, 8520.2479] +25-08-28 09:30:29 | D | - best error = [ 8455.3372, 8442.8299, 8442.8299, 8442.8299, 8442.8299] +25-08-28 09:30:29 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:30:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:30:29 | D | - sum error = [ 8556.0608, 8660.3746, 8718.5549, 8803.7456, 8887.8803] +25-08-28 09:30:29 | D | - best error = [ 8442.8299, 8442.8299, 8442.8299, 8442.8299, 8442.8299] +25-08-28 09:30:29 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:30:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:30:29 | D | - sum error = [ 8998.0745, 9138.3438, 9277.0771, 9469.5356, 9688.6086] +25-08-28 09:30:29 | D | - best error = [ 8442.8299, 8442.8299, 8442.8299, 8442.8299, 8442.8299] +25-08-28 09:30:29 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:30:29 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:30:29 | D | - sum error = [ 9914.6720, 10236.6670, 10741.3865, 11403.7054, 12077.5521] +25-08-28 09:30:29 | D | - best error = [ 8442.8299, 8442.8299, 8442.8299, 8442.8299, 8442.8299] +25-08-28 09:30:29 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:30:29 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:30:29 | D | - sum error = [ 9079.4943, 8925.6753, 8795.6853, 8712.9990, 8664.6283] +25-08-28 09:30:29 | D | - best error = [ 8442.8299, 8442.8299, 8442.8299, 8442.8299, 8442.8299] +25-08-28 09:30:29 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:30:29 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:30:29 | D | - sum error = [ 8624.8307, 8614.2273, 8599.6791, 8638.4499, 8673.7341] +25-08-28 09:30:29 | D | - best error = [ 8442.8299, 8442.8299, 8442.8299, 8442.8299, 8442.8299] +25-08-28 09:30:29 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:30:29 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:30:29 | D | - sum error = [ 8807.8133, 8967.2453, 9133.0941, 9308.6987, 9579.2421] +25-08-28 09:30:29 | D | - best error = [ 8442.8299, 8442.8299, 8442.8299, 8442.8299, 8442.8299] +25-08-28 09:30:29 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:30:29 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:30:29 | D | - sum error = [ 9935.5551, 10413.9303, 11124.8331, 11927.0731] +25-08-28 09:30:29 | D | - best error = [ 8442.8299, 8442.8299, 8442.8299, 8442.8299] +25-08-28 09:30:29 | D | + error = 8442.8299 +25-08-28 09:30:29 | D | + scale = [min=0.9157, max=1.1752] +25-08-28 09:30:29 | D | - transformer_blocks.15.ff_context.up_proj +25-08-28 09:30:29 | D | + w: sint4 +25-08-28 09:30:29 | D | + x: sint4 +25-08-28 09:30:29 | D | + y: None +25-08-28 09:30:29 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:30:29 | D | + finished parsing calibration arguments, ram usage: 18.5 +25-08-28 09:30:29 | D | + x - AbsMax +25-08-28 09:30:29 | D | + x = [min=0.1895, max=43.5000] +25-08-28 09:30:29 | D | + w - AbsMax +25-08-28 09:30:29 | D | + w = [min=0.1025, max=0.5391] +25-08-28 09:30:29 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 09:30:30 | D | + finished calculating the original outputs, ram usage: 18.6 +25-08-28 09:31:13 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:31:13 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:31:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:31:13 | D | - sum error = [ 9577.8294, 9031.1576, 8502.4999, 8019.4965, 7582.8156] +25-08-28 09:31:13 | D | - best error = [ 9577.8294, 9031.1576, 8502.4999, 8019.4965, 7582.8156] +25-08-28 09:31:13 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:31:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:31:13 | D | - sum error = [ 7229.3137, 6902.5833, 6614.7305, 6386.6555, 6185.1571] +25-08-28 09:31:13 | D | - best error = [ 7229.3137, 6902.5833, 6614.7305, 6386.6555, 6185.1571] +25-08-28 09:31:13 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:31:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:31:13 | D | - sum error = [ 6031.4343, 5904.7567, 5821.8372, 5758.5290, 5702.8658] +25-08-28 09:31:13 | D | - best error = [ 6031.4343, 5904.7567, 5821.8372, 5758.5290, 5702.8658] +25-08-28 09:31:13 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:31:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:31:13 | D | - sum error = [ 5717.6201, 5774.7585, 5839.9425, 5965.4562, 6121.9089] +25-08-28 09:31:13 | D | - best error = [ 5702.8658, 5702.8658, 5702.8658, 5702.8658, 5702.8658] +25-08-28 09:31:13 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:31:13 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:31:13 | D | - sum error = [ 8705.1989, 8210.2599, 7744.2182, 7321.8864, 6943.6270] +25-08-28 09:31:13 | D | - best error = [ 5702.8658, 5702.8658, 5702.8658, 5702.8658, 5702.8658] +25-08-28 09:31:13 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:31:13 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:31:13 | D | - sum error = [ 6627.1327, 6353.7271, 6137.5445, 5973.2966, 5834.9800] +25-08-28 09:31:13 | D | - best error = [ 5702.8658, 5702.8658, 5702.8658, 5702.8658, 5702.8658] +25-08-28 09:31:13 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:31:13 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:31:13 | D | - sum error = [ 5724.0052, 5624.8207, 5593.4201, 5599.0012, 5638.0816] +25-08-28 09:31:13 | D | - best error = [ 5702.8658, 5624.8207, 5593.4201, 5593.4201, 5593.4201] +25-08-28 09:31:13 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:31:13 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:31:13 | D | - sum error = [ 5662.1741, 5776.6679, 5930.7922, 6084.3215] +25-08-28 09:31:13 | D | - best error = [ 5593.4201, 5593.4201, 5593.4201, 5593.4201] +25-08-28 09:31:13 | D | + error = 5593.4201 +25-08-28 09:31:13 | D | + scale = [min=0.6140, max=20.0258] +25-08-28 09:31:13 | D | - transformer_blocks.15.ff_context.down_proj +25-08-28 09:31:13 | D | + w: sint4 +25-08-28 09:31:13 | D | + x: uint4 +25-08-28 09:31:13 | D | + y: None +25-08-28 09:31:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:31:13 | D | + finished parsing calibration arguments, ram usage: 18.6 +25-08-28 09:31:14 | D | + x - AbsMax +25-08-28 09:31:14 | D | + x = [min=0.1719, max=73.5000] +25-08-28 09:31:14 | D | + w - AbsMax +25-08-28 09:31:14 | D | + w = [min=0.0496, max=0.4648] +25-08-28 09:31:14 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 09:31:14 | D | + finished calculating the original outputs, ram usage: 18.6 +25-08-28 09:32:02 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:32:02 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:32:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:32:02 | D | - sum error = [ 2580.9514, 2468.5698, 2433.9663, 2438.7074, 2474.2083] +25-08-28 09:32:02 | D | - best error = [ 2580.9514, 2468.5698, 2433.9663, 2433.9663, 2433.9663] +25-08-28 09:32:02 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:32:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:32:02 | D | - sum error = [ 2551.3828, 2653.4405, 2880.2259, 3133.3268, 3508.0972] +25-08-28 09:32:02 | D | - best error = [ 2433.9663, 2433.9663, 2433.9663, 2433.9663, 2433.9663] +25-08-28 09:32:02 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:32:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:32:02 | D | - sum error = [ 4009.5755, 4431.2319, 5032.8038, 5670.6278, 6491.5357] +25-08-28 09:32:02 | D | - best error = [ 2433.9663, 2433.9663, 2433.9663, 2433.9663, 2433.9663] +25-08-28 09:32:02 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:32:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:32:02 | D | - sum error = [ 7305.2502, 8351.2845, 9563.3433, 10760.9345, 12987.6230] +25-08-28 09:32:02 | D | - best error = [ 2433.9663, 2433.9663, 2433.9663, 2433.9663, 2433.9663] +25-08-28 09:32:02 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:32:02 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:32:02 | D | - sum error = [ 2524.4928, 2485.6007, 2479.8545, 2484.7454, 2560.0975] +25-08-28 09:32:02 | D | - best error = [ 2433.9663, 2433.9663, 2433.9663, 2433.9663, 2433.9663] +25-08-28 09:32:02 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:32:02 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:32:02 | D | - sum error = [ 2704.8547, 2870.9202, 3123.3807, 3483.7270, 3911.7589] +25-08-28 09:32:02 | D | - best error = [ 2433.9663, 2433.9663, 2433.9663, 2433.9663, 2433.9663] +25-08-28 09:32:02 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:32:02 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:32:02 | D | - sum error = [ 4552.0009, 5082.1395, 5906.8110, 6707.9857, 7698.8334] +25-08-28 09:32:02 | D | - best error = [ 2433.9663, 2433.9663, 2433.9663, 2433.9663, 2433.9663] +25-08-28 09:32:02 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:32:02 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:32:02 | D | - sum error = [ 8664.1911, 9734.6758, 11009.8777, 13254.2268] +25-08-28 09:32:02 | D | - best error = [ 2433.9663, 2433.9663, 2433.9663, 2433.9663] +25-08-28 09:32:02 | D | + error = 2433.9663 +25-08-28 09:32:02 | D | + scale = [min=0.8385, max=1.5368] +25-08-28 09:32:20 | D | - Smoothing Diffusion Block transformer_blocks.16 +25-08-28 09:32:20 | D | - Skipping Module transformer_blocks.16.norm1.linear +25-08-28 09:32:20 | D | - Skipping Module transformer_blocks.16.norm1_context.linear +25-08-28 09:32:20 | D | - Smoothing Transformer Block transformer_blocks.16 +25-08-28 09:32:20 | D | - transformer_blocks.16.attn.qkv_proj +25-08-28 09:32:20 | D | + w: sint4 +25-08-28 09:32:20 | D | + x: sint4 +25-08-28 09:32:20 | D | + y: None +25-08-28 09:32:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:32:20 | D | + finished parsing calibration arguments, ram usage: 18.6 +25-08-28 09:32:20 | D | + x - AbsMax +25-08-28 09:32:20 | D | + x = [min=0.1855, max=22.1250] +25-08-28 09:32:20 | D | + w - AbsMax +25-08-28 09:32:20 | D | + w = [min=0.1069, max=0.6836] +25-08-28 09:32:20 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 09:32:21 | D | + finished calculating the original outputs, ram usage: 18.6 +25-08-28 09:33:33 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:33:33 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:33:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:33:33 | D | - sum error = [10354.4263, 9918.9286, 9596.1847, 9294.3729, 9001.1946] +25-08-28 09:33:33 | D | - best error = [10354.4263, 9918.9286, 9596.1847, 9294.3729, 9001.1946] +25-08-28 09:33:33 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:33:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:33:33 | D | - sum error = [ 8804.7201, 8670.0012, 8685.5977, 8473.7689, 8423.3936] +25-08-28 09:33:33 | D | - best error = [ 8804.7201, 8670.0012, 8670.0012, 8473.7689, 8423.3936] +25-08-28 09:33:33 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:33:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:33:33 | D | - sum error = [ 8484.4597, 8450.2175, 8664.5642, 8623.0375, 8682.6729] +25-08-28 09:33:33 | D | - best error = [ 8423.3936, 8423.3936, 8423.3936, 8423.3936, 8423.3936] +25-08-28 09:33:33 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:33:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:33:33 | D | - sum error = [ 8801.2067, 8906.6501, 9058.2855, 9197.5545, 9293.1786] +25-08-28 09:33:33 | D | - best error = [ 8423.3936, 8423.3936, 8423.3936, 8423.3936, 8423.3936] +25-08-28 09:33:33 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:33:33 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:33:33 | D | - sum error = [29636.5105, 25914.6439, 23295.4135, 20939.8094, 18842.8669] +25-08-28 09:33:33 | D | - best error = [ 8423.3936, 8423.3936, 8423.3936, 8423.3936, 8423.3936] +25-08-28 09:33:33 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:33:33 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:33:33 | D | - sum error = [17188.6977, 15617.5220, 14354.6703, 13146.0230, 12171.5984] +25-08-28 09:33:33 | D | - best error = [ 8423.3936, 8423.3936, 8423.3936, 8423.3936, 8423.3936] +25-08-28 09:33:33 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:33:33 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:33:33 | D | - sum error = [11336.7713, 10679.2097, 10308.5638, 9958.5212, 10020.2838] +25-08-28 09:33:33 | D | - best error = [ 8423.3936, 8423.3936, 8423.3936, 8423.3936, 8423.3936] +25-08-28 09:33:33 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:33:33 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:33:33 | D | - sum error = [ 9811.9340, 9608.9383, 9380.8435, 9410.6191] +25-08-28 09:33:33 | D | - best error = [ 8423.3936, 8423.3936, 8423.3936, 8423.3936] +25-08-28 09:33:33 | D | + error = 8423.3936 +25-08-28 09:33:33 | D | + scale = [min=0.4686, max=4.0290] +25-08-28 09:33:34 | D | - transformer_blocks.16.attn add_qkv_proj +25-08-28 09:33:34 | D | + w: sint4 +25-08-28 09:33:34 | D | + x: sint4 +25-08-28 09:33:34 | D | + y: None +25-08-28 09:33:34 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:33:34 | D | + finished parsing calibration arguments, ram usage: 18.6 +25-08-28 09:33:34 | D | + x - AbsMax +25-08-28 09:33:34 | D | + x = [min=0.2080, max=25.7500] +25-08-28 09:33:34 | D | + w - AbsMax +25-08-28 09:33:34 | D | + w = [min=0.1079, max=0.5859] +25-08-28 09:33:34 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 09:33:35 | D | + finished calculating the original outputs, ram usage: 18.6 +25-08-28 09:34:37 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:34:37 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:34:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:34:37 | D | - sum error = [ 2567.8084, 2469.3731, 2350.8605, 2254.7679, 2164.3805] +25-08-28 09:34:37 | D | - best error = [ 2567.8084, 2469.3731, 2350.8605, 2254.7679, 2164.3805] +25-08-28 09:34:37 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:34:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:34:37 | D | - sum error = [ 2067.7426, 2021.0108, 1965.8653, 1917.1569, 1870.2013] +25-08-28 09:34:37 | D | - best error = [ 2067.7426, 2021.0108, 1965.8653, 1917.1569, 1870.2013] +25-08-28 09:34:37 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:34:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:34:37 | D | - sum error = [ 1835.5586, 1837.5250, 1849.9911, 1870.6282, 1929.9866] +25-08-28 09:34:37 | D | - best error = [ 1835.5586, 1835.5586, 1835.5586, 1835.5586, 1835.5586] +25-08-28 09:34:37 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:34:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:34:37 | D | - sum error = [ 1947.0650, 2034.2322, 2071.4974, 2168.8997, 2329.9183] +25-08-28 09:34:37 | D | - best error = [ 1835.5586, 1835.5586, 1835.5586, 1835.5586, 1835.5586] +25-08-28 09:34:37 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:34:37 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:34:37 | D | - sum error = [ 3463.7058, 3320.6608, 3176.1990, 2981.1605, 2840.6023] +25-08-28 09:34:37 | D | - best error = [ 1835.5586, 1835.5586, 1835.5586, 1835.5586, 1835.5586] +25-08-28 09:34:37 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:34:37 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:34:37 | D | - sum error = [ 2718.5379, 2515.2619, 2380.5964, 2269.3583, 2183.6143] +25-08-28 09:34:37 | D | - best error = [ 1835.5586, 1835.5586, 1835.5586, 1835.5586, 1835.5586] +25-08-28 09:34:37 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:34:37 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:34:37 | D | - sum error = [ 2111.7424, 2066.8408, 2040.3751, 2020.7661, 2035.6805] +25-08-28 09:34:37 | D | - best error = [ 1835.5586, 1835.5586, 1835.5586, 1835.5586, 1835.5586] +25-08-28 09:34:37 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:34:37 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:34:37 | D | - sum error = [ 2044.9326, 2130.1449, 2198.0622, 2315.2851] +25-08-28 09:34:37 | D | - best error = [ 1835.5586, 1835.5586, 1835.5586, 1835.5586] +25-08-28 09:34:37 | D | + error = 1835.5586 +25-08-28 09:34:37 | D | + scale = [min=0.4561, max=5.0744] +25-08-28 09:34:37 | D | - transformer_blocks.16.attn.out_proj + transformer_blocks.16.attn.add_out_proj +25-08-28 09:34:37 | D | + w: sint4 +25-08-28 09:34:37 | D | + x: sint4 +25-08-28 09:34:37 | D | + y: None +25-08-28 09:34:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:34:37 | D | + finished parsing calibration arguments, ram usage: 18.5 +25-08-28 09:34:37 | D | + x - AbsMax +25-08-28 09:34:37 | D | + x = [min=3.8281, max=20.6250] +25-08-28 09:34:37 | D | + w - AbsMax +25-08-28 09:34:37 | D | + w = [min=0.1289, max=0.3145] +25-08-28 09:34:37 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 09:34:38 | D | + finished calculating the original outputs, ram usage: 18.5 +25-08-28 09:36:22 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:36:22 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:36:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:36:22 | D | - sum error = [10429.1971, 10407.4116, 10391.3214, 10350.6066, 10346.5948] +25-08-28 09:36:22 | D | - best error = [10429.1971, 10407.4116, 10391.3214, 10350.6066, 10346.5948] +25-08-28 09:36:22 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:36:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:36:22 | D | - sum error = [10366.5817, 10372.5102, 10355.6929, 10363.3666, 10372.5615] +25-08-28 09:36:22 | D | - best error = [10346.5948, 10346.5948, 10346.5948, 10346.5948, 10346.5948] +25-08-28 09:36:22 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:36:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:36:22 | D | - sum error = [10409.4595, 10474.5814, 10482.5226, 10561.6855, 10596.2084] +25-08-28 09:36:22 | D | - best error = [10346.5948, 10346.5948, 10346.5948, 10346.5948, 10346.5948] +25-08-28 09:36:22 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:36:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:36:22 | D | - sum error = [10654.2904, 10708.6021, 10781.2675, 10867.2782, 10950.8286] +25-08-28 09:36:22 | D | - best error = [10346.5948, 10346.5948, 10346.5948, 10346.5948, 10346.5948] +25-08-28 09:36:22 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:36:22 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:36:22 | D | - sum error = [11032.2379, 10942.0247, 10884.3197, 10792.2880, 10690.9835] +25-08-28 09:36:22 | D | - best error = [10346.5948, 10346.5948, 10346.5948, 10346.5948, 10346.5948] +25-08-28 09:36:22 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:36:22 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:36:22 | D | - sum error = [10643.5404, 10598.1508, 10568.6642, 10557.1648, 10546.5991] +25-08-28 09:36:22 | D | - best error = [10346.5948, 10346.5948, 10346.5948, 10346.5948, 10346.5948] +25-08-28 09:36:22 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:36:22 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:36:22 | D | - sum error = [10566.8807, 10595.9434, 10611.8413, 10649.5206, 10690.6337] +25-08-28 09:36:22 | D | - best error = [10346.5948, 10346.5948, 10346.5948, 10346.5948, 10346.5948] +25-08-28 09:36:22 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:36:22 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:36:22 | D | - sum error = [10728.7722, 10791.1138, 10852.0992, 10948.2481] +25-08-28 09:36:22 | D | - best error = [10346.5948, 10346.5948, 10346.5948, 10346.5948] +25-08-28 09:36:22 | D | + error = 10346.5948 +25-08-28 09:36:22 | D | + scale = [min=1.3080, max=1.8318] +25-08-28 09:36:22 | D | - transformer_blocks.16.ff.up_proj +25-08-28 09:36:22 | D | + w: sint4 +25-08-28 09:36:22 | D | + x: sint4 +25-08-28 09:36:22 | D | + y: None +25-08-28 09:36:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:36:22 | D | + finished parsing calibration arguments, ram usage: 18.5 +25-08-28 09:36:23 | D | + x - AbsMax +25-08-28 09:36:23 | D | + x = [min=0.1777, max=6.4688] +25-08-28 09:36:23 | D | + w - AbsMax +25-08-28 09:36:23 | D | + w = [min=0.1094, max=0.6680] +25-08-28 09:36:23 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 09:36:24 | D | + finished calculating the original outputs, ram usage: 18.6 +25-08-28 09:37:40 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:37:40 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:37:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:37:40 | D | - sum error = [10799.4567, 10549.9103, 10313.4743, 10109.3897, 9923.0056] +25-08-28 09:37:40 | D | - best error = [10799.4567, 10549.9103, 10313.4743, 10109.3897, 9923.0056] +25-08-28 09:37:40 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:37:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:37:40 | D | - sum error = [ 9759.7332, 9619.1318, 9494.9330, 9390.5940, 9309.6397] +25-08-28 09:37:40 | D | - best error = [ 9759.7332, 9619.1318, 9494.9330, 9390.5940, 9309.6397] +25-08-28 09:37:40 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:37:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:37:40 | D | - sum error = [ 9249.0791, 9204.0265, 9174.0368, 9173.0793, 9179.5982] +25-08-28 09:37:40 | D | - best error = [ 9249.0791, 9204.0265, 9174.0368, 9173.0793, 9173.0793] +25-08-28 09:37:40 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:37:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:37:40 | D | - sum error = [ 9209.3943, 9255.8302, 9321.3244, 9405.7731, 9507.8044] +25-08-28 09:37:40 | D | - best error = [ 9173.0793, 9173.0793, 9173.0793, 9173.0793, 9173.0793] +25-08-28 09:37:40 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:37:40 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:37:40 | D | - sum error = [11158.1153, 10836.3942, 10550.8908, 10291.3288, 10065.1466] +25-08-28 09:37:40 | D | - best error = [ 9173.0793, 9173.0793, 9173.0793, 9173.0793, 9173.0793] +25-08-28 09:37:40 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:37:40 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:37:40 | D | - sum error = [ 9867.1548, 9695.3671, 9549.1084, 9431.1102, 9332.9189] +25-08-28 09:37:40 | D | - best error = [ 9173.0793, 9173.0793, 9173.0793, 9173.0793, 9173.0793] +25-08-28 09:37:40 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:37:40 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:37:40 | D | - sum error = [ 9266.5588, 9221.7551, 9194.2099, 9183.1951, 9200.5713] +25-08-28 09:37:40 | D | - best error = [ 9173.0793, 9173.0793, 9173.0793, 9173.0793, 9173.0793] +25-08-28 09:37:40 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:37:40 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:37:40 | D | - sum error = [ 9244.0056, 9310.5252, 9391.1976, 9497.8010] +25-08-28 09:37:40 | D | - best error = [ 9173.0793, 9173.0793, 9173.0793, 9173.0793] +25-08-28 09:37:40 | D | + error = 9173.0793 +25-08-28 09:37:40 | D | + scale = [min=0.3254, max=3.3654] +25-08-28 09:37:40 | D | - transformer_blocks.16.ff.down_proj +25-08-28 09:37:40 | D | + w: sint4 +25-08-28 09:37:40 | D | + x: uint4 +25-08-28 09:37:40 | D | + y: None +25-08-28 09:37:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:37:40 | D | + finished parsing calibration arguments, ram usage: 18.5 +25-08-28 09:37:42 | D | + x - AbsMax +25-08-28 09:37:42 | D | + x = [min=0.1719, max=31.5000] +25-08-28 09:37:42 | D | + w - AbsMax +25-08-28 09:37:42 | D | + w = [min=0.0684, max=1.0547] +25-08-28 09:37:42 | D | + finished resetting calibrator, ram usage: 18.5 +25-08-28 09:37:45 | D | + finished calculating the original outputs, ram usage: 18.5 +25-08-28 09:39:38 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:39:38 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:39:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:39:38 | D | - sum error = [ 8368.3016, 8409.2330, 8404.2951, 8454.3972, 8509.8954] +25-08-28 09:39:38 | D | - best error = [ 8368.3016, 8368.3016, 8368.3016, 8368.3016, 8368.3016] +25-08-28 09:39:38 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:39:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:39:38 | D | - sum error = [ 8600.1307, 8662.1651, 8742.0345, 8783.7122, 8905.2820] +25-08-28 09:39:38 | D | - best error = [ 8368.3016, 8368.3016, 8368.3016, 8368.3016, 8368.3016] +25-08-28 09:39:38 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:39:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:39:38 | D | - sum error = [ 9017.3903, 9144.3228, 9301.1724, 9504.6237, 9655.5103] +25-08-28 09:39:38 | D | - best error = [ 8368.3016, 8368.3016, 8368.3016, 8368.3016, 8368.3016] +25-08-28 09:39:38 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:39:38 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:39:38 | D | - sum error = [ 9847.2563, 10064.4912, 10363.2188, 10737.3721, 11113.9509] +25-08-28 09:39:38 | D | - best error = [ 8368.3016, 8368.3016, 8368.3016, 8368.3016, 8368.3016] +25-08-28 09:39:38 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:39:38 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:39:38 | D | - sum error = [ 8946.8251, 8769.9824, 8656.3530, 8563.7305, 8513.7883] +25-08-28 09:39:38 | D | - best error = [ 8368.3016, 8368.3016, 8368.3016, 8368.3016, 8368.3016] +25-08-28 09:39:38 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:39:38 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:39:38 | D | - sum error = [ 8480.2274, 8450.3340, 8482.2242, 8541.3161, 8637.8085] +25-08-28 09:39:38 | D | - best error = [ 8368.3016, 8368.3016, 8368.3016, 8368.3016, 8368.3016] +25-08-28 09:39:38 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:39:38 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:39:38 | D | - sum error = [ 8710.8291, 8837.2693, 9018.2766, 9199.3887, 9448.3446] +25-08-28 09:39:38 | D | - best error = [ 8368.3016, 8368.3016, 8368.3016, 8368.3016, 8368.3016] +25-08-28 09:39:38 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:39:38 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:39:38 | D | - sum error = [ 9718.4600, 10080.3063, 10484.3159, 10984.8954] +25-08-28 09:39:38 | D | - best error = [ 8368.3016, 8368.3016, 8368.3016, 8368.3016] +25-08-28 09:39:38 | D | + error = 8368.3016 +25-08-28 09:39:38 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 09:39:39 | D | - transformer_blocks.16.ff_context.up_proj +25-08-28 09:39:39 | D | + w: sint4 +25-08-28 09:39:39 | D | + x: sint4 +25-08-28 09:39:39 | D | + y: None +25-08-28 09:39:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:39:39 | D | + finished parsing calibration arguments, ram usage: 18.5 +25-08-28 09:39:39 | D | + x - AbsMax +25-08-28 09:39:39 | D | + x = [min=0.2480, max=39.5000] +25-08-28 09:39:39 | D | + w - AbsMax +25-08-28 09:39:39 | D | + w = [min=0.1152, max=1.0391] +25-08-28 09:39:39 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 09:39:39 | D | + finished calculating the original outputs, ram usage: 18.6 +25-08-28 09:40:23 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:40:23 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:40:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:40:23 | D | - sum error = [10369.7190, 9867.7304, 9351.5937, 8947.0415, 8503.2685] +25-08-28 09:40:23 | D | - best error = [10369.7190, 9867.7304, 9351.5937, 8947.0415, 8503.2685] +25-08-28 09:40:23 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:40:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:40:23 | D | - sum error = [ 8151.4872, 7818.1028, 7517.1580, 7289.1904, 7079.9619] +25-08-28 09:40:23 | D | - best error = [ 8151.4872, 7818.1028, 7517.1580, 7289.1904, 7079.9619] +25-08-28 09:40:23 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:40:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:40:23 | D | - sum error = [ 6922.4189, 6793.0921, 6678.2746, 6611.9985, 6571.7276] +25-08-28 09:40:23 | D | - best error = [ 6922.4189, 6793.0921, 6678.2746, 6611.9985, 6571.7276] +25-08-28 09:40:23 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:40:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:40:23 | D | - sum error = [ 6581.6624, 6603.9491, 6668.1668, 6762.7273, 6914.7315] +25-08-28 09:40:23 | D | - best error = [ 6571.7276, 6571.7276, 6571.7276, 6571.7276, 6571.7276] +25-08-28 09:40:23 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:40:23 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:40:23 | D | - sum error = [10297.0815, 9729.4686, 9203.7662, 8734.6780, 8317.1293] +25-08-28 09:40:23 | D | - best error = [ 6571.7276, 6571.7276, 6571.7276, 6571.7276, 6571.7276] +25-08-28 09:40:23 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:40:23 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:40:23 | D | - sum error = [ 7926.2049, 7582.8543, 7299.4104, 7028.9743, 6854.6297] +25-08-28 09:40:23 | D | - best error = [ 6571.7276, 6571.7276, 6571.7276, 6571.7276, 6571.7276] +25-08-28 09:40:23 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:40:23 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:40:23 | D | - sum error = [ 6654.4433, 6543.2228, 6485.9414, 6445.0356, 6448.7866] +25-08-28 09:40:23 | D | - best error = [ 6571.7276, 6543.2228, 6485.9414, 6445.0356, 6445.0356] +25-08-28 09:40:23 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:40:23 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:40:23 | D | - sum error = [ 6488.2623, 6576.8010, 6742.2308, 6887.1533] +25-08-28 09:40:23 | D | - best error = [ 6445.0356, 6445.0356, 6445.0356, 6445.0356] +25-08-28 09:40:23 | D | + error = 6445.0356 +25-08-28 09:40:23 | D | + scale = [min=0.6028, max=17.7642] +25-08-28 09:40:23 | D | - transformer_blocks.16.ff_context.down_proj +25-08-28 09:40:23 | D | + w: sint4 +25-08-28 09:40:23 | D | + x: uint4 +25-08-28 09:40:23 | D | + y: None +25-08-28 09:40:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:40:23 | D | + finished parsing calibration arguments, ram usage: 18.6 +25-08-28 09:40:23 | D | + x - AbsMax +25-08-28 09:40:23 | D | + x = [min=0.1719, max=83.0000] +25-08-28 09:40:23 | D | + w - AbsMax +25-08-28 09:40:23 | D | + w = [min=0.0320, max=0.7188] +25-08-28 09:40:23 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 09:40:24 | D | + finished calculating the original outputs, ram usage: 18.5 +25-08-28 09:41:12 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:41:12 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:41:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:41:12 | D | - sum error = [ 2831.4783, 2681.2610, 2584.4772, 2588.5789, 2650.2517] +25-08-28 09:41:12 | D | - best error = [ 2831.4783, 2681.2610, 2584.4772, 2584.4772, 2584.4772] +25-08-28 09:41:12 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:41:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:41:12 | D | - sum error = [ 2829.1115, 3031.0219, 3440.3119, 3972.2375, 4620.0680] +25-08-28 09:41:12 | D | - best error = [ 2584.4772, 2584.4772, 2584.4772, 2584.4772, 2584.4772] +25-08-28 09:41:12 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:41:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:41:12 | D | - sum error = [ 5469.5119, 6494.6021, 7557.8060, 9107.6460, 10819.3153] +25-08-28 09:41:12 | D | - best error = [ 2584.4772, 2584.4772, 2584.4772, 2584.4772, 2584.4772] +25-08-28 09:41:12 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:41:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:41:12 | D | - sum error = [12748.0697, 14540.1678, 15994.1209, 17570.7566, 20035.5205] +25-08-28 09:41:12 | D | - best error = [ 2584.4772, 2584.4772, 2584.4772, 2584.4772, 2584.4772] +25-08-28 09:41:12 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:41:12 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:41:12 | D | - sum error = [ 2755.0107, 2672.6128, 2669.8860, 2731.6283, 2850.9953] +25-08-28 09:41:12 | D | - best error = [ 2584.4772, 2584.4772, 2584.4772, 2584.4772, 2584.4772] +25-08-28 09:41:12 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:41:12 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:41:12 | D | - sum error = [ 3055.6682, 3380.6035, 3793.6202, 4392.5771, 5258.1881] +25-08-28 09:41:12 | D | - best error = [ 2584.4772, 2584.4772, 2584.4772, 2584.4772, 2584.4772] +25-08-28 09:41:12 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:41:12 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:41:12 | D | - sum error = [ 6498.9073, 7889.3997, 9666.1066, 11493.3598, 13527.1709] +25-08-28 09:41:12 | D | - best error = [ 2584.4772, 2584.4772, 2584.4772, 2584.4772, 2584.4772] +25-08-28 09:41:12 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:41:12 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:41:12 | D | - sum error = [15091.6162, 16533.4222, 18237.4560, 20453.9976] +25-08-28 09:41:12 | D | - best error = [ 2584.4772, 2584.4772, 2584.4772, 2584.4772] +25-08-28 09:41:12 | D | + error = 2584.4772 +25-08-28 09:41:12 | D | + scale = [min=0.8385, max=1.5556] +25-08-28 09:41:31 | D | - Smoothing Diffusion Block transformer_blocks.17 +25-08-28 09:41:31 | D | - Skipping Module transformer_blocks.17.norm1.linear +25-08-28 09:41:31 | D | - Skipping Module transformer_blocks.17.norm1_context.linear +25-08-28 09:41:31 | D | - Smoothing Transformer Block transformer_blocks.17 +25-08-28 09:41:31 | D | - transformer_blocks.17.attn.qkv_proj +25-08-28 09:41:31 | D | + w: sint4 +25-08-28 09:41:31 | D | + x: sint4 +25-08-28 09:41:31 | D | + y: None +25-08-28 09:41:31 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:41:31 | D | + finished parsing calibration arguments, ram usage: 18.5 +25-08-28 09:41:31 | D | + x - AbsMax +25-08-28 09:41:31 | D | + x = [min=0.2637, max=19.2500] +25-08-28 09:41:31 | D | + w - AbsMax +25-08-28 09:41:31 | D | + w = [min=0.1108, max=0.8633] +25-08-28 09:41:31 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 09:41:33 | D | + finished calculating the original outputs, ram usage: 18.6 +25-08-28 09:42:51 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:42:51 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:42:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:42:51 | D | - sum error = [10501.5519, 10180.3560, 9985.2371, 9821.5433, 9648.1119] +25-08-28 09:42:51 | D | - best error = [10501.5519, 10180.3560, 9985.2371, 9821.5433, 9648.1119] +25-08-28 09:42:51 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:42:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:42:51 | D | - sum error = [ 9513.2236, 9465.5603, 9432.8045, 9452.4943, 9441.9201] +25-08-28 09:42:51 | D | - best error = [ 9513.2236, 9465.5603, 9432.8045, 9432.8045, 9432.8045] +25-08-28 09:42:51 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:42:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:42:51 | D | - sum error = [ 9316.1597, 9435.0042, 9527.0940, 9541.2024, 9594.5309] +25-08-28 09:42:51 | D | - best error = [ 9316.1597, 9316.1597, 9316.1597, 9316.1597, 9316.1597] +25-08-28 09:42:51 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:42:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:42:51 | D | - sum error = [ 9622.1810, 9782.3355, 9807.4090, 9866.4283, 10084.0595] +25-08-28 09:42:51 | D | - best error = [ 9316.1597, 9316.1597, 9316.1597, 9316.1597, 9316.1597] +25-08-28 09:42:51 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:42:51 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:42:51 | D | - sum error = [38140.5435, 34337.8173, 30537.7500, 27325.7610, 24782.1135] +25-08-28 09:42:51 | D | - best error = [ 9316.1597, 9316.1597, 9316.1597, 9316.1597, 9316.1597] +25-08-28 09:42:51 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:42:51 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:42:51 | D | - sum error = [22235.2146, 19855.2401, 18128.6990, 16314.1225, 14936.0059] +25-08-28 09:42:51 | D | - best error = [ 9316.1597, 9316.1597, 9316.1597, 9316.1597, 9316.1597] +25-08-28 09:42:51 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:42:51 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:42:51 | D | - sum error = [13879.0576, 12911.3234, 12526.5842, 12007.7775, 11414.6937] +25-08-28 09:42:51 | D | - best error = [ 9316.1597, 9316.1597, 9316.1597, 9316.1597, 9316.1597] +25-08-28 09:42:51 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:42:51 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:42:51 | D | - sum error = [10995.5740, 10882.0127, 10622.0616, 10301.9509] +25-08-28 09:42:51 | D | - best error = [ 9316.1597, 9316.1597, 9316.1597, 9316.1597] +25-08-28 09:42:51 | D | + error = 9316.1597 +25-08-28 09:42:51 | D | + scale = [min=0.5135, max=4.3875] +25-08-28 09:42:51 | D | - transformer_blocks.17.attn add_qkv_proj +25-08-28 09:42:51 | D | + w: sint4 +25-08-28 09:42:51 | D | + x: sint4 +25-08-28 09:42:51 | D | + y: None +25-08-28 09:42:51 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:42:51 | D | + finished parsing calibration arguments, ram usage: 18.6 +25-08-28 09:42:51 | D | + x - AbsMax +25-08-28 09:42:51 | D | + x = [min=0.2246, max=35.5000] +25-08-28 09:42:51 | D | + w - AbsMax +25-08-28 09:42:51 | D | + w = [min=0.1152, max=0.6797] +25-08-28 09:42:51 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 09:42:52 | D | + finished calculating the original outputs, ram usage: 18.6 +25-08-28 09:44:00 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:44:00 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:44:00 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:44:00 | D | - sum error = [ 2594.6467, 2502.6830, 2438.7633, 2360.2694, 2280.3390] +25-08-28 09:44:00 | D | - best error = [ 2594.6467, 2502.6830, 2438.7633, 2360.2694, 2280.3390] +25-08-28 09:44:00 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:44:00 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:44:00 | D | - sum error = [ 2231.4057, 2166.5386, 2151.6541, 2092.4263, 2081.9712] +25-08-28 09:44:00 | D | - best error = [ 2231.4057, 2166.5386, 2151.6541, 2092.4263, 2081.9712] +25-08-28 09:44:00 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:44:00 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:44:00 | D | - sum error = [ 2034.5149, 2044.1828, 2040.6941, 2064.9897, 2075.9311] +25-08-28 09:44:00 | D | - best error = [ 2034.5149, 2034.5149, 2034.5149, 2034.5149, 2034.5149] +25-08-28 09:44:00 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:44:00 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:44:00 | D | - sum error = [ 2118.5534, 2181.3421, 2233.8766, 2291.3366, 2365.8954] +25-08-28 09:44:00 | D | - best error = [ 2034.5149, 2034.5149, 2034.5149, 2034.5149, 2034.5149] +25-08-28 09:44:00 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:44:00 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:44:00 | D | - sum error = [ 3802.9870, 3605.0190, 3386.5772, 3264.3064, 3156.4124] +25-08-28 09:44:00 | D | - best error = [ 2034.5149, 2034.5149, 2034.5149, 2034.5149, 2034.5149] +25-08-28 09:44:00 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:44:00 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:44:00 | D | - sum error = [ 2981.1037, 2848.4580, 2735.3644, 2659.4136, 2583.9973] +25-08-28 09:44:00 | D | - best error = [ 2034.5149, 2034.5149, 2034.5149, 2034.5149, 2034.5149] +25-08-28 09:44:00 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:44:00 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:44:00 | D | - sum error = [ 2524.5308, 2421.7571, 2357.2267, 2313.0752, 2327.6412] +25-08-28 09:44:00 | D | - best error = [ 2034.5149, 2034.5149, 2034.5149, 2034.5149, 2034.5149] +25-08-28 09:44:00 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:44:00 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:44:00 | D | - sum error = [ 2324.7143, 2329.8373, 2359.7033, 2418.9839] +25-08-28 09:44:00 | D | - best error = [ 2034.5149, 2034.5149, 2034.5149, 2034.5149] +25-08-28 09:44:00 | D | + error = 2034.5149 +25-08-28 09:44:00 | D | + scale = [min=0.4739, max=5.9582] +25-08-28 09:44:01 | D | - transformer_blocks.17.attn.out_proj + transformer_blocks.17.attn.add_out_proj +25-08-28 09:44:01 | D | + w: sint4 +25-08-28 09:44:01 | D | + x: sint4 +25-08-28 09:44:01 | D | + y: None +25-08-28 09:44:01 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:44:01 | D | + finished parsing calibration arguments, ram usage: 18.6 +25-08-28 09:44:01 | D | + x - AbsMax +25-08-28 09:44:01 | D | + x = [min=4.0938, max=20.7500] +25-08-28 09:44:01 | D | + w - AbsMax +25-08-28 09:44:01 | D | + w = [min=0.1270, max=0.3477] +25-08-28 09:44:01 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 09:44:03 | D | + finished calculating the original outputs, ram usage: 18.6 +25-08-28 09:45:57 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:45:57 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:45:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:45:57 | D | - sum error = [12455.4091, 12409.0444, 12375.0601, 12360.7974, 12328.7261] +25-08-28 09:45:57 | D | - best error = [12455.4091, 12409.0444, 12375.0601, 12360.7974, 12328.7261] +25-08-28 09:45:57 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:45:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:45:57 | D | - sum error = [12308.8573, 12295.6844, 12305.2496, 12314.0940, 12340.3222] +25-08-28 09:45:57 | D | - best error = [12308.8573, 12295.6844, 12295.6844, 12295.6844, 12295.6844] +25-08-28 09:45:57 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:45:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:45:57 | D | - sum error = [12369.5057, 12402.0353, 12441.0951, 12509.7671, 12555.5890] +25-08-28 09:45:57 | D | - best error = [12295.6844, 12295.6844, 12295.6844, 12295.6844, 12295.6844] +25-08-28 09:45:57 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:45:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:45:57 | D | - sum error = [12586.4683, 12662.7713, 12750.7352, 12824.0667, 12898.8545] +25-08-28 09:45:57 | D | - best error = [12295.6844, 12295.6844, 12295.6844, 12295.6844, 12295.6844] +25-08-28 09:45:57 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:45:57 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:45:57 | D | - sum error = [12913.7192, 12840.2916, 12724.2207, 12667.2763, 12596.9101] +25-08-28 09:45:57 | D | - best error = [12295.6844, 12295.6844, 12295.6844, 12295.6844, 12295.6844] +25-08-28 09:45:57 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:45:57 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:45:57 | D | - sum error = [12536.1902, 12492.7665, 12462.9736, 12459.7252, 12456.1702] +25-08-28 09:45:57 | D | - best error = [12295.6844, 12295.6844, 12295.6844, 12295.6844, 12295.6844] +25-08-28 09:45:57 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:45:57 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:45:57 | D | - sum error = [12434.9662, 12468.9226, 12490.6307, 12521.7980, 12572.9220] +25-08-28 09:45:57 | D | - best error = [12295.6844, 12295.6844, 12295.6844, 12295.6844, 12295.6844] +25-08-28 09:45:57 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:45:57 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:45:57 | D | - sum error = [12617.7526, 12710.1767, 12798.3918, 12891.5098] +25-08-28 09:45:57 | D | - best error = [12295.6844, 12295.6844, 12295.6844, 12295.6844] +25-08-28 09:45:57 | D | + error = 12295.6844 +25-08-28 09:45:57 | D | + scale = [min=1.5263, max=2.4837] +25-08-28 09:45:58 | D | - transformer_blocks.17.ff.up_proj +25-08-28 09:45:58 | D | + w: sint4 +25-08-28 09:45:58 | D | + x: sint4 +25-08-28 09:45:58 | D | + y: None +25-08-28 09:45:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:45:58 | D | + finished parsing calibration arguments, ram usage: 18.5 +25-08-28 09:45:58 | D | + x - AbsMax +25-08-28 09:45:58 | D | + x = [min=0.1680, max=10.6875] +25-08-28 09:45:58 | D | + w - AbsMax +25-08-28 09:45:58 | D | + w = [min=0.0918, max=0.5234] +25-08-28 09:45:58 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 09:45:59 | D | + finished calculating the original outputs, ram usage: 18.6 +25-08-28 09:47:21 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:47:21 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:47:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:47:21 | D | - sum error = [10376.7324, 10120.5122, 9891.5414, 9683.4626, 9488.1884] +25-08-28 09:47:21 | D | - best error = [10376.7324, 10120.5122, 9891.5414, 9683.4626, 9488.1884] +25-08-28 09:47:21 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:47:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:47:21 | D | - sum error = [ 9313.0566, 9164.7149, 9032.4535, 8923.9444, 8828.1767] +25-08-28 09:47:21 | D | - best error = [ 9313.0566, 9164.7149, 9032.4535, 8923.9444, 8828.1767] +25-08-28 09:47:21 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:47:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:47:21 | D | - sum error = [ 8751.0562, 8689.4534, 8648.4732, 8616.0042, 8605.0715] +25-08-28 09:47:21 | D | - best error = [ 8751.0562, 8689.4534, 8648.4732, 8616.0042, 8605.0715] +25-08-28 09:47:21 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:47:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:47:21 | D | - sum error = [ 8595.2689, 8607.3615, 8632.8368, 8667.0872, 8717.2179] +25-08-28 09:47:21 | D | - best error = [ 8595.2689, 8595.2689, 8595.2689, 8595.2689, 8595.2689] +25-08-28 09:47:21 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:47:21 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:47:21 | D | - sum error = [10492.4887, 10209.7773, 9953.3689, 9717.3618, 9509.8244] +25-08-28 09:47:21 | D | - best error = [ 8595.2689, 8595.2689, 8595.2689, 8595.2689, 8595.2689] +25-08-28 09:47:21 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:47:21 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:47:21 | D | - sum error = [ 9327.4425, 9165.8156, 9026.2320, 8909.7645, 8800.0486] +25-08-28 09:47:21 | D | - best error = [ 8595.2689, 8595.2689, 8595.2689, 8595.2689, 8595.2689] +25-08-28 09:47:21 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:47:21 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:47:21 | D | - sum error = [ 8720.1142, 8657.3238, 8617.4242, 8591.8850, 8582.5987] +25-08-28 09:47:21 | D | - best error = [ 8595.2689, 8595.2689, 8595.2689, 8591.8850, 8582.5987] +25-08-28 09:47:21 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:47:21 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:47:21 | D | - sum error = [ 8588.0186, 8609.0130, 8651.5839, 8705.8780] +25-08-28 09:47:21 | D | - best error = [ 8582.5987, 8582.5987, 8582.5987, 8582.5987] +25-08-28 09:47:21 | D | + error = 8582.5987 +25-08-28 09:47:21 | D | + scale = [min=0.4354, max=8.8694] +25-08-28 09:47:21 | D | - transformer_blocks.17.ff.down_proj +25-08-28 09:47:21 | D | + w: sint4 +25-08-28 09:47:21 | D | + x: uint4 +25-08-28 09:47:21 | D | + y: None +25-08-28 09:47:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:47:21 | D | + finished parsing calibration arguments, ram usage: 18.6 +25-08-28 09:47:24 | D | + x - AbsMax +25-08-28 09:47:24 | D | + x = [min=0.1719, max=69.0000] +25-08-28 09:47:24 | D | + w - AbsMax +25-08-28 09:47:24 | D | + w = [min=0.1113, max=1.1562] +25-08-28 09:47:24 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 09:47:26 | D | + finished calculating the original outputs, ram usage: 18.6 +25-08-28 09:49:22 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:49:22 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:49:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:49:22 | D | - sum error = [ 8265.0551, 8264.7444, 8293.4524, 8287.6404, 8341.7744] +25-08-28 09:49:22 | D | - best error = [ 8265.0551, 8264.7444, 8264.7444, 8264.7444, 8264.7444] +25-08-28 09:49:22 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:49:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:49:22 | D | - sum error = [ 8366.9058, 8401.3270, 8449.9524, 8504.2371, 8588.0107] +25-08-28 09:49:22 | D | - best error = [ 8264.7444, 8264.7444, 8264.7444, 8264.7444, 8264.7444] +25-08-28 09:49:22 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:49:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:49:22 | D | - sum error = [ 8733.0416, 8842.9574, 8973.8754, 9124.0530, 9242.9497] +25-08-28 09:49:22 | D | - best error = [ 8264.7444, 8264.7444, 8264.7444, 8264.7444, 8264.7444] +25-08-28 09:49:22 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:49:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:49:22 | D | - sum error = [ 9396.4380, 9550.4325, 9735.5485, 9933.7469, 10161.2464] +25-08-28 09:49:22 | D | - best error = [ 8264.7444, 8264.7444, 8264.7444, 8264.7444, 8264.7444] +25-08-28 09:49:22 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:49:22 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:49:22 | D | - sum error = [ 9442.8890, 9178.6334, 8978.7081, 8794.0161, 8677.3517] +25-08-28 09:49:22 | D | - best error = [ 8264.7444, 8264.7444, 8264.7444, 8264.7444, 8264.7444] +25-08-28 09:49:22 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:49:22 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:49:22 | D | - sum error = [ 8618.4142, 8529.6912, 8519.8291, 8535.6785, 8527.3546] +25-08-28 09:49:22 | D | - best error = [ 8264.7444, 8264.7444, 8264.7444, 8264.7444, 8264.7444] +25-08-28 09:49:22 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:49:22 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:49:22 | D | - sum error = [ 8596.5649, 8666.5513, 8776.2539, 8937.1521, 9081.7117] +25-08-28 09:49:22 | D | - best error = [ 8264.7444, 8264.7444, 8264.7444, 8264.7444, 8264.7444] +25-08-28 09:49:22 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:49:22 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:49:22 | D | - sum error = [ 9298.9539, 9504.0008, 9777.0891, 10050.9064] +25-08-28 09:49:22 | D | - best error = [ 8264.7444, 8264.7444, 8264.7444, 8264.7444] +25-08-28 09:49:22 | D | + error = 8264.7444 +25-08-28 09:49:22 | D | + scale = [min=0.9157, max=1.2358] +25-08-28 09:49:22 | D | - transformer_blocks.17.ff_context.up_proj +25-08-28 09:49:22 | D | + w: sint4 +25-08-28 09:49:22 | D | + x: sint4 +25-08-28 09:49:22 | D | + y: None +25-08-28 09:49:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:49:22 | D | + finished parsing calibration arguments, ram usage: 18.5 +25-08-28 09:49:22 | D | + x - AbsMax +25-08-28 09:49:22 | D | + x = [min=0.2363, max=44.7500] +25-08-28 09:49:22 | D | + w - AbsMax +25-08-28 09:49:22 | D | + w = [min=0.1074, max=0.6328] +25-08-28 09:49:22 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 09:49:23 | D | + finished calculating the original outputs, ram usage: 18.6 +25-08-28 09:50:07 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:50:07 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:50:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:50:07 | D | - sum error = [10111.9640, 9688.2593, 9307.2668, 8978.0657, 8616.9164] +25-08-28 09:50:07 | D | - best error = [10111.9640, 9688.2593, 9307.2668, 8978.0657, 8616.9164] +25-08-28 09:50:07 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:50:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:50:07 | D | - sum error = [ 8273.7754, 7958.3038, 7626.5767, 7324.6280, 7084.8684] +25-08-28 09:50:07 | D | - best error = [ 8273.7754, 7958.3038, 7626.5767, 7324.6280, 7084.8684] +25-08-28 09:50:07 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:50:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:50:07 | D | - sum error = [ 6941.1887, 6824.4868, 6757.7718, 6741.2379, 6709.2175] +25-08-28 09:50:07 | D | - best error = [ 6941.1887, 6824.4868, 6757.7718, 6741.2379, 6709.2175] +25-08-28 09:50:07 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:50:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:50:07 | D | - sum error = [ 6700.5803, 6730.5076, 6808.3361, 6930.0724, 7035.2808] +25-08-28 09:50:07 | D | - best error = [ 6700.5803, 6700.5803, 6700.5803, 6700.5803, 6700.5803] +25-08-28 09:50:07 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:50:07 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:50:07 | D | - sum error = [ 9207.3466, 8823.6319, 8411.1692, 8005.3402, 7699.8519] +25-08-28 09:50:07 | D | - best error = [ 6700.5803, 6700.5803, 6700.5803, 6700.5803, 6700.5803] +25-08-28 09:50:07 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:50:07 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:50:07 | D | - sum error = [ 7367.0447, 7153.6939, 6952.8711, 6777.8050, 6674.7422] +25-08-28 09:50:07 | D | - best error = [ 6700.5803, 6700.5803, 6700.5803, 6700.5803, 6674.7422] +25-08-28 09:50:07 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:50:07 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:50:07 | D | - sum error = [ 6596.5512, 6563.5103, 6568.9122, 6596.6877, 6569.6423] +25-08-28 09:50:07 | D | - best error = [ 6596.5512, 6563.5103, 6563.5103, 6563.5103, 6563.5103] +25-08-28 09:50:07 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:50:07 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:50:07 | D | - sum error = [ 6640.7789, 6704.1074, 6852.1748, 6992.1356] +25-08-28 09:50:07 | D | - best error = [ 6563.5103, 6563.5103, 6563.5103, 6563.5103] +25-08-28 09:50:07 | D | + error = 6563.5103 +25-08-28 09:50:07 | D | + scale = [min=0.7771, max=21.3820] +25-08-28 09:50:07 | D | - transformer_blocks.17.ff_context.down_proj +25-08-28 09:50:07 | D | + w: sint4 +25-08-28 09:50:07 | D | + x: uint4 +25-08-28 09:50:07 | D | + y: None +25-08-28 09:50:07 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:50:07 | D | + finished parsing calibration arguments, ram usage: 18.6 +25-08-28 09:50:07 | D | + x - AbsMax +25-08-28 09:50:07 | D | + x = [min=0.1719, max=69.5000] +25-08-28 09:50:07 | D | + w - AbsMax +25-08-28 09:50:07 | D | + w = [min=0.0391, max=0.7500] +25-08-28 09:50:07 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 09:50:08 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 09:50:55 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:50:55 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:50:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:50:55 | D | - sum error = [ 3934.4094, 3738.0736, 3596.7708, 3540.4531, 3506.6531] +25-08-28 09:50:55 | D | - best error = [ 3934.4094, 3738.0736, 3596.7708, 3540.4531, 3506.6531] +25-08-28 09:50:55 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:50:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:50:55 | D | - sum error = [ 3574.3893, 3659.6068, 3872.5448, 4162.9559, 4587.1926] +25-08-28 09:50:55 | D | - best error = [ 3506.6531, 3506.6531, 3506.6531, 3506.6531, 3506.6531] +25-08-28 09:50:55 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:50:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:50:55 | D | - sum error = [ 5051.1620, 5884.2131, 6920.0798, 8020.5466, 8932.6171] +25-08-28 09:50:55 | D | - best error = [ 3506.6531, 3506.6531, 3506.6531, 3506.6531, 3506.6531] +25-08-28 09:50:55 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:50:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:50:55 | D | - sum error = [ 9648.1505, 10777.4594, 12863.1897, 16616.8425, 20806.1460] +25-08-28 09:50:55 | D | - best error = [ 3506.6531, 3506.6531, 3506.6531, 3506.6531, 3506.6531] +25-08-28 09:50:55 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:50:55 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:50:55 | D | - sum error = [ 4057.2625, 3892.8253, 3773.6062, 3726.7102, 3719.5427] +25-08-28 09:50:55 | D | - best error = [ 3506.6531, 3506.6531, 3506.6531, 3506.6531, 3506.6531] +25-08-28 09:50:55 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:50:55 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:50:55 | D | - sum error = [ 3828.4315, 3976.8447, 4232.2975, 4680.3115, 5259.6424] +25-08-28 09:50:55 | D | - best error = [ 3506.6531, 3506.6531, 3506.6531, 3506.6531, 3506.6531] +25-08-28 09:50:55 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:50:55 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:50:55 | D | - sum error = [ 6105.7419, 7150.1519, 8238.2843, 9012.4913, 9930.0743] +25-08-28 09:50:55 | D | - best error = [ 3506.6531, 3506.6531, 3506.6531, 3506.6531, 3506.6531] +25-08-28 09:50:55 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:50:55 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:50:55 | D | - sum error = [10968.4033, 13513.0278, 17279.1611, 21079.3647] +25-08-28 09:50:55 | D | - best error = [ 3506.6531, 3506.6531, 3506.6531, 3506.6531] +25-08-28 09:50:55 | D | + error = 3506.6531 +25-08-28 09:50:55 | D | + scale = [min=0.7031, max=2.3356] +25-08-28 09:51:14 | D | - Smoothing Diffusion Block transformer_blocks.18 +25-08-28 09:51:14 | D | - Skipping Module transformer_blocks.18.norm1.linear +25-08-28 09:51:14 | D | - Skipping Module transformer_blocks.18.norm1_context.linear +25-08-28 09:51:14 | D | - Smoothing Transformer Block transformer_blocks.18 +25-08-28 09:51:14 | D | - transformer_blocks.18.attn.qkv_proj +25-08-28 09:51:14 | D | + w: sint4 +25-08-28 09:51:14 | D | + x: sint4 +25-08-28 09:51:14 | D | + y: None +25-08-28 09:51:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:51:14 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 09:51:14 | D | + x - AbsMax +25-08-28 09:51:14 | D | + x = [min=0.1797, max=16.1250] +25-08-28 09:51:14 | D | + w - AbsMax +25-08-28 09:51:14 | D | + w = [min=0.1094, max=0.9492] +25-08-28 09:51:14 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 09:51:15 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 09:52:28 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:52:28 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:52:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:52:28 | D | - sum error = [11361.5590, 11126.6340, 10918.9619, 10707.0373, 10509.9039] +25-08-28 09:52:28 | D | - best error = [11361.5590, 11126.6340, 10918.9619, 10707.0373, 10509.9039] +25-08-28 09:52:28 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:52:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:52:28 | D | - sum error = [10349.4541, 10229.7539, 10198.4089, 10230.7114, 9981.7730] +25-08-28 09:52:28 | D | - best error = [10349.4541, 10229.7539, 10198.4089, 10198.4089, 9981.7730] +25-08-28 09:52:28 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:52:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:52:28 | D | - sum error = [ 9976.0417, 10063.7528, 9980.1271, 10018.6298, 10049.9930] +25-08-28 09:52:28 | D | - best error = [ 9976.0417, 9976.0417, 9976.0417, 9976.0417, 9976.0417] +25-08-28 09:52:28 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:52:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:52:28 | D | - sum error = [ 9991.4383, 10107.7519, 10121.6884, 10223.5861, 10274.5386] +25-08-28 09:52:28 | D | - best error = [ 9976.0417, 9976.0417, 9976.0417, 9976.0417, 9976.0417] +25-08-28 09:52:28 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:52:28 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:52:28 | D | - sum error = [32498.9245, 29044.9935, 25847.1383, 22859.6238, 20552.3784] +25-08-28 09:52:28 | D | - best error = [ 9976.0417, 9976.0417, 9976.0417, 9976.0417, 9976.0417] +25-08-28 09:52:28 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:52:28 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:52:28 | D | - sum error = [18593.9586, 16891.9171, 15424.7598, 14530.9810, 13586.1647] +25-08-28 09:52:28 | D | - best error = [ 9976.0417, 9976.0417, 9976.0417, 9976.0417, 9976.0417] +25-08-28 09:52:28 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:52:28 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:52:28 | D | - sum error = [12573.2134, 12022.6054, 11684.0349, 11201.5370, 10918.5178] +25-08-28 09:52:28 | D | - best error = [ 9976.0417, 9976.0417, 9976.0417, 9976.0417, 9976.0417] +25-08-28 09:52:28 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:52:28 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:52:28 | D | - sum error = [10651.6958, 10531.7744, 10338.4123, 10289.2379] +25-08-28 09:52:28 | D | - best error = [ 9976.0417, 9976.0417, 9976.0417, 9976.0417] +25-08-28 09:52:28 | D | + error = 9976.0417 +25-08-28 09:52:28 | D | + scale = [min=0.4239, max=4.0156] +25-08-28 09:52:28 | D | - transformer_blocks.18.attn add_qkv_proj +25-08-28 09:52:28 | D | + w: sint4 +25-08-28 09:52:28 | D | + x: sint4 +25-08-28 09:52:28 | D | + y: None +25-08-28 09:52:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:52:28 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 09:52:28 | D | + x - AbsMax +25-08-28 09:52:28 | D | + x = [min=0.2109, max=26.5000] +25-08-28 09:52:28 | D | + w - AbsMax +25-08-28 09:52:28 | D | + w = [min=0.1177, max=0.4453] +25-08-28 09:52:28 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 09:52:29 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 09:53:33 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:53:33 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:53:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:53:33 | D | - sum error = [ 3297.8678, 3268.7095, 3176.7013, 3086.5876, 2983.8368] +25-08-28 09:53:33 | D | - best error = [ 3297.8678, 3268.7095, 3176.7013, 3086.5876, 2983.8368] +25-08-28 09:53:33 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:53:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:53:33 | D | - sum error = [ 2947.7919, 2913.0256, 2901.5944, 2801.7360, 2758.7465] +25-08-28 09:53:33 | D | - best error = [ 2947.7919, 2913.0256, 2901.5944, 2801.7360, 2758.7465] +25-08-28 09:53:33 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:53:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:53:33 | D | - sum error = [ 2765.3314, 2724.2682, 2774.5195, 2731.7791, 2788.2312] +25-08-28 09:53:33 | D | - best error = [ 2758.7465, 2724.2682, 2724.2682, 2724.2682, 2724.2682] +25-08-28 09:53:33 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:53:33 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:53:33 | D | - sum error = [ 2729.1425, 2767.4910, 2807.2410, 2889.2743, 2994.9477] +25-08-28 09:53:33 | D | - best error = [ 2724.2682, 2724.2682, 2724.2682, 2724.2682, 2724.2682] +25-08-28 09:53:33 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:53:33 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:53:33 | D | - sum error = [ 4230.0426, 4177.5756, 4072.2559, 3988.3894, 3814.2349] +25-08-28 09:53:33 | D | - best error = [ 2724.2682, 2724.2682, 2724.2682, 2724.2682, 2724.2682] +25-08-28 09:53:33 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:53:33 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:53:33 | D | - sum error = [ 3619.0467, 3476.9974, 3383.7680, 3251.8115, 3100.1108] +25-08-28 09:53:33 | D | - best error = [ 2724.2682, 2724.2682, 2724.2682, 2724.2682, 2724.2682] +25-08-28 09:53:33 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:53:33 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:53:33 | D | - sum error = [ 3029.0990, 2955.5402, 2873.0165, 2919.7705, 2923.3997] +25-08-28 09:53:33 | D | - best error = [ 2724.2682, 2724.2682, 2724.2682, 2724.2682, 2724.2682] +25-08-28 09:53:33 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:53:33 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:53:33 | D | - sum error = [ 2885.1791, 2885.7471, 2928.6058, 2996.2977] +25-08-28 09:53:33 | D | - best error = [ 2724.2682, 2724.2682, 2724.2682, 2724.2682] +25-08-28 09:53:33 | D | + error = 2724.2682 +25-08-28 09:53:33 | D | + scale = [min=0.4249, max=6.0644] +25-08-28 09:53:33 | D | - transformer_blocks.18.attn.out_proj + transformer_blocks.18.attn.add_out_proj +25-08-28 09:53:33 | D | + w: sint4 +25-08-28 09:53:33 | D | + x: sint4 +25-08-28 09:53:33 | D | + y: None +25-08-28 09:53:33 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:53:33 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 09:53:34 | D | + x - AbsMax +25-08-28 09:53:34 | D | + x = [min=4.1562, max=25.7500] +25-08-28 09:53:34 | D | + w - AbsMax +25-08-28 09:53:34 | D | + w = [min=0.1260, max=0.4414] +25-08-28 09:53:34 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 09:53:35 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 09:55:20 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:55:20 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:55:20 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:55:20 | D | - sum error = [17527.6066, 17424.2608, 17333.6260, 17271.7693, 17232.7428] +25-08-28 09:55:20 | D | - best error = [17527.6066, 17424.2608, 17333.6260, 17271.7693, 17232.7428] +25-08-28 09:55:20 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:55:20 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:55:20 | D | - sum error = [17203.4536, 17172.2902, 17173.7865, 17177.8732, 17212.0655] +25-08-28 09:55:20 | D | - best error = [17203.4536, 17172.2902, 17172.2902, 17172.2902, 17172.2902] +25-08-28 09:55:20 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:55:20 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:55:20 | D | - sum error = [17245.0981, 17267.0526, 17340.0964, 17413.8749, 17519.7881] +25-08-28 09:55:20 | D | - best error = [17172.2902, 17172.2902, 17172.2902, 17172.2902, 17172.2902] +25-08-28 09:55:20 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:55:20 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:55:20 | D | - sum error = [17589.7798, 17661.0441, 17827.1146, 17951.3235, 18117.6316] +25-08-28 09:55:20 | D | - best error = [17172.2902, 17172.2902, 17172.2902, 17172.2902, 17172.2902] +25-08-28 09:55:20 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:55:20 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:55:20 | D | - sum error = [18557.3360, 18371.9430, 18193.2058, 18044.7320, 17920.2437] +25-08-28 09:55:20 | D | - best error = [17172.2902, 17172.2902, 17172.2902, 17172.2902, 17172.2902] +25-08-28 09:55:20 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:55:20 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:55:20 | D | - sum error = [17801.8138, 17690.5453, 17625.7012, 17581.0908, 17554.5677] +25-08-28 09:55:20 | D | - best error = [17172.2902, 17172.2902, 17172.2902, 17172.2902, 17172.2902] +25-08-28 09:55:20 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:55:20 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:55:20 | D | - sum error = [17561.7001, 17504.0941, 17538.9395, 17609.8881, 17670.7523] +25-08-28 09:55:20 | D | - best error = [17172.2902, 17172.2902, 17172.2902, 17172.2902, 17172.2902] +25-08-28 09:55:20 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:55:20 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:55:20 | D | - sum error = [17769.1868, 17859.2443, 17984.7651, 18114.9627] +25-08-28 09:55:20 | D | - best error = [17172.2902, 17172.2902, 17172.2902, 17172.2902] +25-08-28 09:55:20 | D | + error = 17172.2902 +25-08-28 09:55:20 | D | + scale = [min=1.5332, max=2.6499] +25-08-28 09:55:21 | D | - transformer_blocks.18.ff.up_proj +25-08-28 09:55:21 | D | + w: sint4 +25-08-28 09:55:21 | D | + x: sint4 +25-08-28 09:55:21 | D | + y: None +25-08-28 09:55:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:55:21 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 09:55:21 | D | + x - AbsMax +25-08-28 09:55:21 | D | + x = [min=0.2656, max=11.8125] +25-08-28 09:55:21 | D | + w - AbsMax +25-08-28 09:55:21 | D | + w = [min=0.1099, max=0.6992] +25-08-28 09:55:21 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 09:55:22 | D | + finished calculating the original outputs, ram usage: 15.9 +25-08-28 09:56:40 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:56:40 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:56:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:56:40 | D | - sum error = [10552.4947, 10255.3432, 9980.8268, 9743.8978, 9533.2416] +25-08-28 09:56:40 | D | - best error = [10552.4947, 10255.3432, 9980.8268, 9743.8978, 9533.2416] +25-08-28 09:56:40 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:56:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:56:40 | D | - sum error = [ 9350.3069, 9199.8457, 9069.9504, 8962.6443, 8880.4982] +25-08-28 09:56:40 | D | - best error = [ 9350.3069, 9199.8457, 9069.9504, 8962.6443, 8880.4982] +25-08-28 09:56:40 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:56:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:56:40 | D | - sum error = [ 8807.4732, 8761.9715, 8723.4722, 8704.5643, 8707.8911] +25-08-28 09:56:40 | D | - best error = [ 8807.4732, 8761.9715, 8723.4722, 8704.5643, 8704.5643] +25-08-28 09:56:40 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:56:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:56:40 | D | - sum error = [ 8724.4287, 8758.2966, 8795.9836, 8851.9426, 8924.6643] +25-08-28 09:56:40 | D | - best error = [ 8704.5643, 8704.5643, 8704.5643, 8704.5643, 8704.5643] +25-08-28 09:56:40 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:56:40 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:56:40 | D | - sum error = [14276.6383, 13523.9905, 12838.2173, 12208.1415, 11656.3974] +25-08-28 09:56:40 | D | - best error = [ 8704.5643, 8704.5643, 8704.5643, 8704.5643, 8704.5643] +25-08-28 09:56:40 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:56:40 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:56:40 | D | - sum error = [11160.2706, 10715.4849, 10326.6366, 9990.6765, 9701.8258] +25-08-28 09:56:40 | D | - best error = [ 8704.5643, 8704.5643, 8704.5643, 8704.5643, 8704.5643] +25-08-28 09:56:40 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:56:40 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:56:40 | D | - sum error = [ 9470.0758, 9269.7771, 9107.8933, 8981.0543, 8889.0530] +25-08-28 09:56:40 | D | - best error = [ 8704.5643, 8704.5643, 8704.5643, 8704.5643, 8704.5643] +25-08-28 09:56:40 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:56:40 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:56:40 | D | - sum error = [ 8847.9815, 8837.6916, 8863.9440, 8912.8536] +25-08-28 09:56:40 | D | - best error = [ 8704.5643, 8704.5643, 8704.5643, 8704.5643] +25-08-28 09:56:40 | D | + error = 8704.5643 +25-08-28 09:56:40 | D | + scale = [min=0.4224, max=4.9776] +25-08-28 09:56:41 | D | - transformer_blocks.18.ff.down_proj +25-08-28 09:56:41 | D | + w: sint4 +25-08-28 09:56:41 | D | + x: uint4 +25-08-28 09:56:41 | D | + y: None +25-08-28 09:56:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:56:41 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 09:56:43 | D | + x - AbsMax +25-08-28 09:56:43 | D | + x = [min=1.2891, max=201.0000] +25-08-28 09:56:43 | D | + w - AbsMax +25-08-28 09:56:43 | D | + w = [min=0.1113, max=1.4609] +25-08-28 09:56:43 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 09:56:45 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 09:58:37 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:58:37 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:58:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:58:37 | D | - sum error = [10225.0974, 10076.6225, 9867.0336, 9815.8125, 9717.0572] +25-08-28 09:58:37 | D | - best error = [10225.0974, 10076.6225, 9867.0336, 9815.8125, 9717.0572] +25-08-28 09:58:37 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:58:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:58:37 | D | - sum error = [ 9721.8945, 9722.0047, 9694.9067, 9761.6158, 9759.5582] +25-08-28 09:58:37 | D | - best error = [ 9717.0572, 9717.0572, 9694.9067, 9694.9067, 9694.9067] +25-08-28 09:58:37 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:58:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:58:37 | D | - sum error = [ 9914.8039, 10003.0843, 10114.6500, 10232.0180, 10347.3498] +25-08-28 09:58:37 | D | - best error = [ 9694.9067, 9694.9067, 9694.9067, 9694.9067, 9694.9067] +25-08-28 09:58:37 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:58:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:58:37 | D | - sum error = [10536.7001, 10684.1876, 10893.9569, 11067.7954, 11361.3316] +25-08-28 09:58:37 | D | - best error = [ 9694.9067, 9694.9067, 9694.9067, 9694.9067, 9694.9067] +25-08-28 09:58:37 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:58:37 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:58:37 | D | - sum error = [20925.1007, 17486.1362, 14899.3729, 13090.6318, 11857.6773] +25-08-28 09:58:37 | D | - best error = [ 9694.9067, 9694.9067, 9694.9067, 9694.9067, 9694.9067] +25-08-28 09:58:37 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:58:37 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:58:37 | D | - sum error = [11107.2146, 10549.4242, 10188.9042, 10007.6519, 9857.7182] +25-08-28 09:58:37 | D | - best error = [ 9694.9067, 9694.9067, 9694.9067, 9694.9067, 9694.9067] +25-08-28 09:58:37 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:58:37 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:58:37 | D | - sum error = [ 9836.7417, 9760.7287, 9852.3191, 9972.4035, 10110.4746] +25-08-28 09:58:37 | D | - best error = [ 9694.9067, 9694.9067, 9694.9067, 9694.9067, 9694.9067] +25-08-28 09:58:37 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:58:37 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:58:37 | D | - sum error = [10354.0724, 10626.4948, 10801.5973, 11186.6134] +25-08-28 09:58:37 | D | - best error = [ 9694.9067, 9694.9067, 9694.9067, 9694.9067] +25-08-28 09:58:37 | D | + error = 9694.9067 +25-08-28 09:58:37 | D | + scale = [min=1.0929, max=6.3991] +25-08-28 09:58:38 | D | - transformer_blocks.18.ff_context.up_proj +25-08-28 09:58:38 | D | + w: sint4 +25-08-28 09:58:38 | D | + x: sint4 +25-08-28 09:58:38 | D | + y: None +25-08-28 09:58:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:58:38 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 09:58:38 | D | + x - AbsMax +25-08-28 09:58:38 | D | + x = [min=0.4258, max=122.0000] +25-08-28 09:58:38 | D | + w - AbsMax +25-08-28 09:58:38 | D | + w = [min=0.1406, max=0.9375] +25-08-28 09:58:38 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 09:58:38 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 09:59:22 | D | - x / w range = AbsMax / AbsMax +25-08-28 09:59:22 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 09:59:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:59:22 | D | - sum error = [19858.2207, 18408.4441, 17109.6165, 15970.7322, 15010.8260] +25-08-28 09:59:22 | D | - best error = [19858.2207, 18408.4441, 17109.6165, 15970.7322, 15010.8260] +25-08-28 09:59:22 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 09:59:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:59:22 | D | - sum error = [13939.4209, 13039.2307, 12247.3083, 11470.8772, 10792.1486] +25-08-28 09:59:22 | D | - best error = [13939.4209, 13039.2307, 12247.3083, 11470.8772, 10792.1486] +25-08-28 09:59:22 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 09:59:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:59:22 | D | - sum error = [10235.5381, 9694.8225, 9288.2726, 8951.1680, 8701.7819] +25-08-28 09:59:22 | D | - best error = [10235.5381, 9694.8225, 9288.2726, 8951.1680, 8701.7819] +25-08-28 09:59:22 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:59:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 09:59:22 | D | - sum error = [ 8516.9239, 8481.7022, 8491.7442, 8533.4814, 8693.3943] +25-08-28 09:59:22 | D | - best error = [ 8516.9239, 8481.7022, 8481.7022, 8481.7022, 8481.7022] +25-08-28 09:59:22 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 09:59:22 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 09:59:22 | D | - sum error = [23224.9466, 21160.5285, 19446.0937, 17875.4750, 16488.2610] +25-08-28 09:59:22 | D | - best error = [ 8481.7022, 8481.7022, 8481.7022, 8481.7022, 8481.7022] +25-08-28 09:59:22 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 09:59:22 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 09:59:22 | D | - sum error = [15163.4856, 14050.1610, 13021.1281, 12092.2706, 11317.0622] +25-08-28 09:59:22 | D | - best error = [ 8481.7022, 8481.7022, 8481.7022, 8481.7022, 8481.7022] +25-08-28 09:59:22 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 09:59:22 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 09:59:22 | D | - sum error = [10660.8480, 10078.9611, 9539.5815, 9089.9883, 8793.5586] +25-08-28 09:59:22 | D | - best error = [ 8481.7022, 8481.7022, 8481.7022, 8481.7022, 8481.7022] +25-08-28 09:59:22 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 09:59:22 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 09:59:22 | D | - sum error = [ 8575.1478, 8526.5135, 8542.5490, 8669.0373] +25-08-28 09:59:22 | D | - best error = [ 8481.7022, 8481.7022, 8481.7022, 8481.7022] +25-08-28 09:59:22 | D | + error = 8481.7022 +25-08-28 09:59:22 | D | + scale = [min=0.5051, max=46.6754] +25-08-28 09:59:23 | D | - transformer_blocks.18.ff_context.down_proj +25-08-28 09:59:23 | D | + w: sint4 +25-08-28 09:59:23 | D | + x: uint4 +25-08-28 09:59:23 | D | + y: None +25-08-28 09:59:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 09:59:23 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 09:59:23 | D | + x - AbsMax +25-08-28 09:59:23 | D | + x = [min=0.1719, max=217.0000] +25-08-28 09:59:23 | D | + w - AbsMax +25-08-28 09:59:23 | D | + w = [min=0.0255, max=0.8125] +25-08-28 09:59:23 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 09:59:24 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:00:12 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:00:12 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:00:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:00:12 | D | - sum error = [11458.7476, 10757.5611, 10061.9775, 9611.4457, 9212.9474] +25-08-28 10:00:12 | D | - best error = [11458.7476, 10757.5611, 10061.9775, 9611.4457, 9212.9474] +25-08-28 10:00:12 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:00:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:00:12 | D | - sum error = [ 8908.7337, 8717.9355, 8766.8416, 8862.2900, 9332.4012] +25-08-28 10:00:12 | D | - best error = [ 8908.7337, 8717.9355, 8717.9355, 8717.9355, 8717.9355] +25-08-28 10:00:12 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:00:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:00:12 | D | - sum error = [10079.7425, 10960.8350, 11542.0518, 12181.7417, 12482.0005] +25-08-28 10:00:12 | D | - best error = [ 8717.9355, 8717.9355, 8717.9355, 8717.9355, 8717.9355] +25-08-28 10:00:12 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:00:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:00:12 | D | - sum error = [13230.9075, 14379.1605, 16917.4126, 20127.8331, 24090.8945] +25-08-28 10:00:12 | D | - best error = [ 8717.9355, 8717.9355, 8717.9355, 8717.9355, 8717.9355] +25-08-28 10:00:12 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:00:12 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:00:12 | D | - sum error = [45230.6077, 31279.6604, 22552.8741, 17844.1520, 15222.7614] +25-08-28 10:00:12 | D | - best error = [ 8717.9355, 8717.9355, 8717.9355, 8717.9355, 8717.9355] +25-08-28 10:00:12 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:00:12 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:00:12 | D | - sum error = [13387.1927, 12314.1272, 11365.9699, 11070.4515, 11083.1790] +25-08-28 10:00:12 | D | - best error = [ 8717.9355, 8717.9355, 8717.9355, 8717.9355, 8717.9355] +25-08-28 10:00:12 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:00:12 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:00:12 | D | - sum error = [11617.1232, 12197.9021, 12556.3638, 12986.7280, 13514.8765] +25-08-28 10:00:12 | D | - best error = [ 8717.9355, 8717.9355, 8717.9355, 8717.9355, 8717.9355] +25-08-28 10:00:12 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:00:12 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:00:12 | D | - sum error = [14720.9967, 17098.3424, 20377.5249, 24224.5588] +25-08-28 10:00:12 | D | - best error = [ 8717.9355, 8717.9355, 8717.9355, 8717.9355] +25-08-28 10:00:12 | D | + error = 8717.9355 +25-08-28 10:00:12 | D | + scale = [min=0.5896, max=5.0227] +25-08-28 10:00:31 | D | - Smoothing Diffusion Block single_transformer_blocks.0 +25-08-28 10:00:31 | D | - Skipping Module single_transformer_blocks.0.norm.linear +25-08-28 10:00:31 | D | - Smoothing Transformer Block single_transformer_blocks.0 +25-08-28 10:00:31 | D | - single_transformer_blocks.0.attn.qkv_proj + single_transformer_blocks.0.up_proj +25-08-28 10:00:31 | D | + w: sint4 +25-08-28 10:00:31 | D | + x: sint4 +25-08-28 10:00:31 | D | + y: None +25-08-28 10:00:31 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:00:31 | D | + finished parsing calibration arguments, ram usage: 18.6 +25-08-28 10:00:32 | D | + x - AbsMax +25-08-28 10:00:32 | D | + x = [min=0.2285, max=46.0000] +25-08-28 10:00:32 | D | + w - AbsMax +25-08-28 10:00:32 | D | + w = [min=0.1152, max=0.5391] +25-08-28 10:00:32 | D | + finished resetting calibrator, ram usage: 18.5 +25-08-28 10:00:33 | D | + finished calculating the original outputs, ram usage: 18.5 +25-08-28 10:02:40 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:02:40 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:02:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:02:40 | D | - sum error = [19927.0694, 19320.7660, 18681.0389, 18173.6926, 17936.6949] +25-08-28 10:02:40 | D | - best error = [19927.0694, 19320.7660, 18681.0389, 18173.6926, 17936.6949] +25-08-28 10:02:40 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:02:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:02:40 | D | - sum error = [17582.8526, 17266.7208, 17101.6238, 16932.4248, 16854.3559] +25-08-28 10:02:40 | D | - best error = [17582.8526, 17266.7208, 17101.6238, 16932.4248, 16854.3559] +25-08-28 10:02:40 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:02:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:02:40 | D | - sum error = [16876.2868, 16846.0892, 16918.9144, 17083.0667, 17236.3829] +25-08-28 10:02:40 | D | - best error = [16854.3559, 16846.0892, 16846.0892, 16846.0892, 16846.0892] +25-08-28 10:02:40 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:02:40 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:02:40 | D | - sum error = [17401.9162, 17760.5027, 17972.5444, 18634.8396, 18762.7904] +25-08-28 10:02:40 | D | - best error = [16846.0892, 16846.0892, 16846.0892, 16846.0892, 16846.0892] +25-08-28 10:02:40 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:02:40 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:02:40 | D | - sum error = [25359.8221, 24083.5732, 23503.4649, 22433.0128, 21547.2467] +25-08-28 10:02:40 | D | - best error = [16846.0892, 16846.0892, 16846.0892, 16846.0892, 16846.0892] +25-08-28 10:02:40 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:02:40 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:02:40 | D | - sum error = [20745.0821, 20319.4866, 19539.7720, 19138.6094, 18701.8870] +25-08-28 10:02:40 | D | - best error = [16846.0892, 16846.0892, 16846.0892, 16846.0892, 16846.0892] +25-08-28 10:02:40 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:02:40 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:02:40 | D | - sum error = [18540.7435, 18300.9814, 18274.6811, 18074.9843, 18116.7202] +25-08-28 10:02:40 | D | - best error = [16846.0892, 16846.0892, 16846.0892, 16846.0892, 16846.0892] +25-08-28 10:02:40 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:02:40 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:02:40 | D | - sum error = [18317.4819, 18445.9973, 18948.9318, 18831.1019] +25-08-28 10:02:40 | D | - best error = [16846.0892, 16846.0892, 16846.0892, 16846.0892] +25-08-28 10:02:40 | D | + error = 16846.0892 +25-08-28 10:02:40 | D | + scale = [min=0.4440, max=8.2133] +25-08-28 10:02:40 | D | - single_transformer_blocks.0.attn.out_proj +25-08-28 10:02:40 | D | + w: sint4 +25-08-28 10:02:40 | D | + x: sint4 +25-08-28 10:02:40 | D | + y: None +25-08-28 10:02:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:02:40 | D | + finished parsing calibration arguments, ram usage: 18.5 +25-08-28 10:02:41 | D | + x - AbsMax +25-08-28 10:02:41 | D | + x = [min=1.6484, max=14.4375] +25-08-28 10:02:41 | D | + w - AbsMax +25-08-28 10:02:41 | D | + w = [min=0.1201, max=0.3164] +25-08-28 10:02:41 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 10:02:42 | D | + finished calculating the original outputs, ram usage: 18.6 +25-08-28 10:03:42 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:03:42 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:03:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:03:42 | D | - sum error = [ 8111.0113, 8087.7465, 8079.1198, 8088.2601, 8085.4031] +25-08-28 10:03:42 | D | - best error = [ 8111.0113, 8087.7465, 8079.1198, 8079.1198, 8079.1198] +25-08-28 10:03:42 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:03:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:03:42 | D | - sum error = [ 8095.2255, 8116.2011, 8144.5201, 8193.6336, 8238.5120] +25-08-28 10:03:42 | D | - best error = [ 8079.1198, 8079.1198, 8079.1198, 8079.1198, 8079.1198] +25-08-28 10:03:42 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:03:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:03:42 | D | - sum error = [ 8296.2343, 8334.2637, 8408.5671, 8496.4637, 8593.4254] +25-08-28 10:03:42 | D | - best error = [ 8079.1198, 8079.1198, 8079.1198, 8079.1198, 8079.1198] +25-08-28 10:03:42 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:03:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:03:42 | D | - sum error = [ 8681.3345, 8772.2991, 8883.1738, 8984.1803, 9108.9042] +25-08-28 10:03:42 | D | - best error = [ 8079.1198, 8079.1198, 8079.1198, 8079.1198, 8079.1198] +25-08-28 10:03:42 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:03:42 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:03:42 | D | - sum error = [ 8448.6633, 8403.8510, 8377.0835, 8348.5314, 8341.9331] +25-08-28 10:03:42 | D | - best error = [ 8079.1198, 8079.1198, 8079.1198, 8079.1198, 8079.1198] +25-08-28 10:03:42 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:03:42 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:03:42 | D | - sum error = [ 8327.1455, 8321.5076, 8338.7102, 8358.4329, 8386.8707] +25-08-28 10:03:42 | D | - best error = [ 8079.1198, 8079.1198, 8079.1198, 8079.1198, 8079.1198] +25-08-28 10:03:42 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:03:42 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:03:42 | D | - sum error = [ 8439.1559, 8475.0467, 8527.5173, 8603.6904, 8685.6332] +25-08-28 10:03:42 | D | - best error = [ 8079.1198, 8079.1198, 8079.1198, 8079.1198, 8079.1198] +25-08-28 10:03:42 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:03:42 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:03:42 | D | - sum error = [ 8775.2899, 8888.7038, 8991.7923, 9098.4038] +25-08-28 10:03:42 | D | - best error = [ 8079.1198, 8079.1198, 8079.1198, 8079.1198] +25-08-28 10:03:42 | D | + error = 8079.1198 +25-08-28 10:03:42 | D | + scale = [min=1.0513, max=1.3060] +25-08-28 10:03:42 | D | - single_transformer_blocks.0.down_proj +25-08-28 10:03:42 | D | + w: sint4 +25-08-28 10:03:42 | D | + x: uint4 +25-08-28 10:03:42 | D | + y: None +25-08-28 10:03:42 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:03:42 | D | + finished parsing calibration arguments, ram usage: 18.6 +25-08-28 10:03:44 | D | + x - AbsMax +25-08-28 10:03:44 | D | + x = [min=1.9609, max=22.8750] +25-08-28 10:03:44 | D | + w - AbsMax +25-08-28 10:03:44 | D | + w = [min=0.1147, max=1.3672] +25-08-28 10:03:44 | D | + finished resetting calibrator, ram usage: 18.6 +25-08-28 10:03:46 | D | + finished calculating the original outputs, ram usage: 18.6 +25-08-28 10:05:42 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:05:42 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:05:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:05:42 | D | - sum error = [ 9299.0511, 9289.8284, 9305.8154, 9327.7993, 9358.5020] +25-08-28 10:05:42 | D | - best error = [ 9299.0511, 9289.8284, 9289.8284, 9289.8284, 9289.8284] +25-08-28 10:05:42 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:05:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:05:42 | D | - sum error = [ 9409.0233, 9451.9056, 9525.3657, 9626.2325, 9686.4421] +25-08-28 10:05:42 | D | - best error = [ 9289.8284, 9289.8284, 9289.8284, 9289.8284, 9289.8284] +25-08-28 10:05:42 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:05:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:05:42 | D | - sum error = [ 9770.6569, 9878.5460, 10012.8661, 10160.5453, 10306.4617] +25-08-28 10:05:42 | D | - best error = [ 9289.8284, 9289.8284, 9289.8284, 9289.8284, 9289.8284] +25-08-28 10:05:42 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:05:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:05:42 | D | - sum error = [10423.7322, 10557.9555, 10723.5233, 10897.9679, 11109.6819] +25-08-28 10:05:42 | D | - best error = [ 9289.8284, 9289.8284, 9289.8284, 9289.8284, 9289.8284] +25-08-28 10:05:42 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:05:42 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:05:42 | D | - sum error = [10195.5492, 10004.3551, 9848.3725, 9695.4913, 9594.9749] +25-08-28 10:05:42 | D | - best error = [ 9289.8284, 9289.8284, 9289.8284, 9289.8284, 9289.8284] +25-08-28 10:05:42 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:05:42 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:05:42 | D | - sum error = [ 9510.4006, 9407.6199, 9353.6474, 9384.8376, 9411.4330] +25-08-28 10:05:42 | D | - best error = [ 9289.8284, 9289.8284, 9289.8284, 9289.8284, 9289.8284] +25-08-28 10:05:42 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:05:42 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:05:42 | D | - sum error = [ 9460.6651, 9564.4943, 9687.0701, 9815.5974, 10032.2919] +25-08-28 10:05:42 | D | - best error = [ 9289.8284, 9289.8284, 9289.8284, 9289.8284, 9289.8284] +25-08-28 10:05:42 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:05:42 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:05:42 | D | - sum error = [10237.9580, 10445.0884, 10712.9779, 10980.1097] +25-08-28 10:05:42 | D | - best error = [ 9289.8284, 9289.8284, 9289.8284, 9289.8284] +25-08-28 10:05:42 | D | + error = 9289.8284 +25-08-28 10:05:42 | D | + scale = [min=1.0342, max=1.1694] +25-08-28 10:06:02 | D | - Smoothing Diffusion Block single_transformer_blocks.1 +25-08-28 10:06:02 | D | - Skipping Module single_transformer_blocks.1.norm.linear +25-08-28 10:06:02 | D | - Smoothing Transformer Block single_transformer_blocks.1 +25-08-28 10:06:02 | D | - single_transformer_blocks.1.attn.qkv_proj + single_transformer_blocks.1.up_proj +25-08-28 10:06:02 | D | + w: sint4 +25-08-28 10:06:02 | D | + x: sint4 +25-08-28 10:06:02 | D | + y: None +25-08-28 10:06:02 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:06:02 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:06:02 | D | + x - AbsMax +25-08-28 10:06:02 | D | + x = [min=0.2471, max=36.0000] +25-08-28 10:06:02 | D | + w - AbsMax +25-08-28 10:06:02 | D | + w = [min=0.0977, max=0.5117] +25-08-28 10:06:02 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:06:03 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:08:08 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:08:08 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:08:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:08:08 | D | - sum error = [20488.2679, 19908.3010, 19773.6382, 19264.2915, 19115.1994] +25-08-28 10:08:08 | D | - best error = [20488.2679, 19908.3010, 19773.6382, 19264.2915, 19115.1994] +25-08-28 10:08:08 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:08:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:08:08 | D | - sum error = [19418.6640, 18730.7320, 18806.8362, 18727.9598, 18758.7155] +25-08-28 10:08:08 | D | - best error = [19115.1994, 18730.7320, 18730.7320, 18727.9598, 18727.9598] +25-08-28 10:08:08 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:08:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:08:08 | D | - sum error = [18926.0937, 19457.6677, 18710.2855, 18950.6559, 19329.9759] +25-08-28 10:08:08 | D | - best error = [18727.9598, 18727.9598, 18710.2855, 18710.2855, 18710.2855] +25-08-28 10:08:08 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:08:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:08:08 | D | - sum error = [19437.1586, 19665.7729, 19973.2197, 20675.9656, 21006.8949] +25-08-28 10:08:08 | D | - best error = [18710.2855, 18710.2855, 18710.2855, 18710.2855, 18710.2855] +25-08-28 10:08:08 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:08:08 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:08:08 | D | - sum error = [25809.5176, 25219.7307, 24531.4049, 23276.1344, 22810.8564] +25-08-28 10:08:08 | D | - best error = [18710.2855, 18710.2855, 18710.2855, 18710.2855, 18710.2855] +25-08-28 10:08:08 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:08:08 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:08:08 | D | - sum error = [22153.4900, 21456.8322, 21012.7184, 20621.5064, 20274.7307] +25-08-28 10:08:08 | D | - best error = [18710.2855, 18710.2855, 18710.2855, 18710.2855, 18710.2855] +25-08-28 10:08:08 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:08:08 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:08:08 | D | - sum error = [19918.3196, 19786.6824, 19650.6676, 19753.3252, 19966.6522] +25-08-28 10:08:08 | D | - best error = [18710.2855, 18710.2855, 18710.2855, 18710.2855, 18710.2855] +25-08-28 10:08:08 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:08:08 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:08:08 | D | - sum error = [20207.0111, 20366.4689, 20733.5824, 20949.0223] +25-08-28 10:08:08 | D | - best error = [18710.2855, 18710.2855, 18710.2855, 18710.2855] +25-08-28 10:08:08 | D | + error = 18710.2855 +25-08-28 10:08:08 | D | + scale = [min=0.4322, max=8.5858] +25-08-28 10:08:08 | D | - single_transformer_blocks.1.attn.out_proj +25-08-28 10:08:08 | D | + w: sint4 +25-08-28 10:08:08 | D | + x: sint4 +25-08-28 10:08:08 | D | + y: None +25-08-28 10:08:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:08:08 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:08:09 | D | + x - AbsMax +25-08-28 10:08:09 | D | + x = [min=2.1250, max=19.5000] +25-08-28 10:08:09 | D | + w - AbsMax +25-08-28 10:08:09 | D | + w = [min=0.1172, max=0.5586] +25-08-28 10:08:09 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:08:10 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:09:07 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:09:07 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:09:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:09:07 | D | - sum error = [ 7002.2671, 6989.8638, 6972.1245, 6963.4761, 6971.9458] +25-08-28 10:09:07 | D | - best error = [ 7002.2671, 6989.8638, 6972.1245, 6963.4761, 6963.4761] +25-08-28 10:09:07 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:09:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:09:07 | D | - sum error = [ 6983.2617, 7014.9324, 7055.0748, 7091.7090, 7145.3627] +25-08-28 10:09:07 | D | - best error = [ 6963.4761, 6963.4761, 6963.4761, 6963.4761, 6963.4761] +25-08-28 10:09:07 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:09:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:09:07 | D | - sum error = [ 7198.3798, 7269.2998, 7339.6559, 7417.8309, 7514.6321] +25-08-28 10:09:07 | D | - best error = [ 6963.4761, 6963.4761, 6963.4761, 6963.4761, 6963.4761] +25-08-28 10:09:07 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:09:07 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:09:07 | D | - sum error = [ 7619.4236, 7723.4863, 7825.2494, 7965.1403, 8110.2565] +25-08-28 10:09:07 | D | - best error = [ 6963.4761, 6963.4761, 6963.4761, 6963.4761, 6963.4761] +25-08-28 10:09:07 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:09:07 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:09:07 | D | - sum error = [ 7357.6233, 7304.7645, 7271.1798, 7232.5691, 7220.2521] +25-08-28 10:09:07 | D | - best error = [ 6963.4761, 6963.4761, 6963.4761, 6963.4761, 6963.4761] +25-08-28 10:09:07 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:09:07 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:09:07 | D | - sum error = [ 7209.3728, 7225.1289, 7235.4588, 7256.1260, 7297.3713] +25-08-28 10:09:07 | D | - best error = [ 6963.4761, 6963.4761, 6963.4761, 6963.4761, 6963.4761] +25-08-28 10:09:07 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:09:07 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:09:07 | D | - sum error = [ 7352.6977, 7402.4015, 7470.1815, 7556.8528, 7647.4404] +25-08-28 10:09:07 | D | - best error = [ 6963.4761, 6963.4761, 6963.4761, 6963.4761, 6963.4761] +25-08-28 10:09:07 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:09:07 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:09:07 | D | - sum error = [ 7742.2173, 7850.0232, 7965.6088, 8112.7887] +25-08-28 10:09:07 | D | - best error = [ 6963.4761, 6963.4761, 6963.4761, 6963.4761] +25-08-28 10:09:07 | D | + error = 6963.4761 +25-08-28 10:09:07 | D | + scale = [min=1.1197, max=1.5614] +25-08-28 10:09:08 | D | - single_transformer_blocks.1.down_proj +25-08-28 10:09:08 | D | + w: sint4 +25-08-28 10:09:08 | D | + x: uint4 +25-08-28 10:09:08 | D | + y: None +25-08-28 10:09:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:09:08 | D | + finished parsing calibration arguments, ram usage: 18.5 +25-08-28 10:09:11 | D | + x - AbsMax +25-08-28 10:09:11 | D | + x = [min=2.1562, max=18.5000] +25-08-28 10:09:11 | D | + w - AbsMax +25-08-28 10:09:11 | D | + w = [min=0.1138, max=1.5000] +25-08-28 10:09:11 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:09:13 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:11:13 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:11:13 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:11:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:11:13 | D | - sum error = [ 9379.5782, 9403.5415, 9423.3001, 9458.7301, 9502.8392] +25-08-28 10:11:13 | D | - best error = [ 9379.5782, 9379.5782, 9379.5782, 9379.5782, 9379.5782] +25-08-28 10:11:13 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:11:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:11:13 | D | - sum error = [ 9573.3296, 9643.8752, 9722.7881, 9799.6518, 9919.7694] +25-08-28 10:11:13 | D | - best error = [ 9379.5782, 9379.5782, 9379.5782, 9379.5782, 9379.5782] +25-08-28 10:11:13 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:11:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:11:13 | D | - sum error = [10047.6779, 10177.3690, 10330.5012, 10449.2492, 10599.5419] +25-08-28 10:11:13 | D | - best error = [ 9379.5782, 9379.5782, 9379.5782, 9379.5782, 9379.5782] +25-08-28 10:11:13 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:11:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:11:13 | D | - sum error = [10795.1019, 10977.5290, 11188.6489, 11390.4971, 11642.8464] +25-08-28 10:11:13 | D | - best error = [ 9379.5782, 9379.5782, 9379.5782, 9379.5782, 9379.5782] +25-08-28 10:11:13 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:11:13 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:11:13 | D | - sum error = [ 9887.3390, 9751.1316, 9611.5818, 9480.7671, 9480.3312] +25-08-28 10:11:13 | D | - best error = [ 9379.5782, 9379.5782, 9379.5782, 9379.5782, 9379.5782] +25-08-28 10:11:13 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:11:13 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:11:13 | D | - sum error = [ 9446.6108, 9395.4785, 9395.3231, 9440.9457, 9512.3066] +25-08-28 10:11:13 | D | - best error = [ 9379.5782, 9379.5782, 9379.5782, 9379.5782, 9379.5782] +25-08-28 10:11:13 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:11:13 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:11:13 | D | - sum error = [ 9557.2787, 9683.1226, 9814.5688, 10031.0757, 10260.5138] +25-08-28 10:11:13 | D | - best error = [ 9379.5782, 9379.5782, 9379.5782, 9379.5782, 9379.5782] +25-08-28 10:11:13 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:11:13 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:11:13 | D | - sum error = [10533.7522, 10777.0868, 11182.6283, 11511.0128] +25-08-28 10:11:13 | D | - best error = [ 9379.5782, 9379.5782, 9379.5782, 9379.5782] +25-08-28 10:11:13 | D | + error = 9379.5782 +25-08-28 10:11:13 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 10:11:34 | D | - Smoothing Diffusion Block single_transformer_blocks.2 +25-08-28 10:11:34 | D | - Skipping Module single_transformer_blocks.2.norm.linear +25-08-28 10:11:34 | D | - Smoothing Transformer Block single_transformer_blocks.2 +25-08-28 10:11:34 | D | - single_transformer_blocks.2.attn.qkv_proj + single_transformer_blocks.2.up_proj +25-08-28 10:11:34 | D | + w: sint4 +25-08-28 10:11:34 | D | + x: sint4 +25-08-28 10:11:34 | D | + y: None +25-08-28 10:11:34 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:11:34 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:11:34 | D | + x - AbsMax +25-08-28 10:11:34 | D | + x = [min=0.2305, max=25.0000] +25-08-28 10:11:34 | D | + w - AbsMax +25-08-28 10:11:34 | D | + w = [min=0.0977, max=0.5234] +25-08-28 10:11:34 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:11:36 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:13:45 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:13:45 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:13:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:13:45 | D | - sum error = [23445.3455, 22921.7458, 22671.6111, 22541.6983, 22355.7377] +25-08-28 10:13:45 | D | - best error = [23445.3455, 22921.7458, 22671.6111, 22541.6983, 22355.7377] +25-08-28 10:13:45 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:13:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:13:45 | D | - sum error = [21919.1248, 21895.7801, 21795.5335, 21883.4016, 21842.2468] +25-08-28 10:13:45 | D | - best error = [21919.1248, 21895.7801, 21795.5335, 21795.5335, 21795.5335] +25-08-28 10:13:45 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:13:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:13:45 | D | - sum error = [22070.3225, 22245.5626, 22191.3035, 22148.5631, 22549.6430] +25-08-28 10:13:45 | D | - best error = [21795.5335, 21795.5335, 21795.5335, 21795.5335, 21795.5335] +25-08-28 10:13:45 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:13:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:13:45 | D | - sum error = [23030.6418, 23522.2774, 24157.9125, 24395.9026, 24651.8919] +25-08-28 10:13:45 | D | - best error = [21795.5335, 21795.5335, 21795.5335, 21795.5335, 21795.5335] +25-08-28 10:13:45 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:13:45 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:13:45 | D | - sum error = [28095.0786, 27269.1955, 26660.3502, 26178.9155, 25681.2249] +25-08-28 10:13:45 | D | - best error = [21795.5335, 21795.5335, 21795.5335, 21795.5335, 21795.5335] +25-08-28 10:13:45 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:13:45 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:13:45 | D | - sum error = [25037.0746, 24707.5088, 24644.2711, 24046.0358, 24073.7201] +25-08-28 10:13:45 | D | - best error = [21795.5335, 21795.5335, 21795.5335, 21795.5335, 21795.5335] +25-08-28 10:13:45 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:13:45 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:13:45 | D | - sum error = [23891.3005, 23610.6444, 23909.0035, 23829.6326, 23539.5905] +25-08-28 10:13:45 | D | - best error = [21795.5335, 21795.5335, 21795.5335, 21795.5335, 21795.5335] +25-08-28 10:13:45 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:13:45 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:13:45 | D | - sum error = [23561.5782, 23987.0365, 24510.7336, 24947.6200] +25-08-28 10:13:45 | D | - best error = [21795.5335, 21795.5335, 21795.5335, 21795.5335] +25-08-28 10:13:45 | D | + error = 21795.5335 +25-08-28 10:13:45 | D | + scale = [min=0.5983, max=3.0852] +25-08-28 10:13:46 | D | - single_transformer_blocks.2.attn.out_proj +25-08-28 10:13:46 | D | + w: sint4 +25-08-28 10:13:46 | D | + x: sint4 +25-08-28 10:13:46 | D | + y: None +25-08-28 10:13:46 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:13:46 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:13:47 | D | + x - AbsMax +25-08-28 10:13:47 | D | + x = [min=2.4531, max=15.0000] +25-08-28 10:13:47 | D | + w - AbsMax +25-08-28 10:13:47 | D | + w = [min=0.1182, max=0.3105] +25-08-28 10:13:47 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:13:48 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:14:49 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:14:49 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:14:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:14:49 | D | - sum error = [ 7930.2862, 7913.6788, 7897.4127, 7893.0643, 7883.4807] +25-08-28 10:14:49 | D | - best error = [ 7930.2862, 7913.6788, 7897.4127, 7893.0643, 7883.4807] +25-08-28 10:14:49 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:14:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:14:49 | D | - sum error = [ 7882.1878, 7898.5556, 7918.8721, 7948.7189, 7977.4008] +25-08-28 10:14:49 | D | - best error = [ 7882.1878, 7882.1878, 7882.1878, 7882.1878, 7882.1878] +25-08-28 10:14:49 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:14:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:14:49 | D | - sum error = [ 8006.5433, 8055.4707, 8114.0067, 8160.8286, 8219.0934] +25-08-28 10:14:49 | D | - best error = [ 7882.1878, 7882.1878, 7882.1878, 7882.1878, 7882.1878] +25-08-28 10:14:49 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:14:49 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:14:49 | D | - sum error = [ 8277.9944, 8341.6042, 8436.2380, 8533.1279, 8612.1737] +25-08-28 10:14:49 | D | - best error = [ 7882.1878, 7882.1878, 7882.1878, 7882.1878, 7882.1878] +25-08-28 10:14:49 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:14:49 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:14:49 | D | - sum error = [ 8243.8263, 8201.8027, 8155.1348, 8119.8635, 8085.6742] +25-08-28 10:14:49 | D | - best error = [ 7882.1878, 7882.1878, 7882.1878, 7882.1878, 7882.1878] +25-08-28 10:14:49 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:14:49 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:14:49 | D | - sum error = [ 8071.9931, 8082.1170, 8077.7361, 8068.8780, 8094.4099] +25-08-28 10:14:49 | D | - best error = [ 7882.1878, 7882.1878, 7882.1878, 7882.1878, 7882.1878] +25-08-28 10:14:49 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:14:49 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:14:49 | D | - sum error = [ 8122.4403, 8150.4562, 8196.1201, 8241.5926, 8302.4796] +25-08-28 10:14:49 | D | - best error = [ 7882.1878, 7882.1878, 7882.1878, 7882.1878, 7882.1878] +25-08-28 10:14:49 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:14:49 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:14:49 | D | - sum error = [ 8364.8154, 8438.9563, 8518.9436, 8609.2230] +25-08-28 10:14:49 | D | - best error = [ 7882.1878, 7882.1878, 7882.1878, 7882.1878] +25-08-28 10:14:49 | D | + error = 7882.1878 +25-08-28 10:14:49 | D | + scale = [min=1.2515, max=1.9680] +25-08-28 10:14:49 | D | - single_transformer_blocks.2.down_proj +25-08-28 10:14:49 | D | + w: sint4 +25-08-28 10:14:49 | D | + x: uint4 +25-08-28 10:14:49 | D | + y: None +25-08-28 10:14:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:14:49 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:14:52 | D | + x - AbsMax +25-08-28 10:14:52 | D | + x = [min=2.2188, max=18.7500] +25-08-28 10:14:52 | D | + w - AbsMax +25-08-28 10:14:52 | D | + w = [min=0.1147, max=1.4609] +25-08-28 10:14:52 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:14:54 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:16:54 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:16:54 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:16:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:16:54 | D | - sum error = [ 9009.3729, 9055.0039, 9090.2638, 9103.5606, 9165.3639] +25-08-28 10:16:54 | D | - best error = [ 9009.3729, 9009.3729, 9009.3729, 9009.3729, 9009.3729] +25-08-28 10:16:54 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:16:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:16:54 | D | - sum error = [ 9233.7680, 9296.1226, 9386.2811, 9514.6456, 9587.9061] +25-08-28 10:16:54 | D | - best error = [ 9009.3729, 9009.3729, 9009.3729, 9009.3729, 9009.3729] +25-08-28 10:16:54 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:16:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:16:54 | D | - sum error = [ 9712.2685, 9890.4867, 10020.3844, 10184.5609, 10358.3879] +25-08-28 10:16:54 | D | - best error = [ 9009.3729, 9009.3729, 9009.3729, 9009.3729, 9009.3729] +25-08-28 10:16:54 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:16:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:16:54 | D | - sum error = [10537.7220, 10769.3937, 10963.6220, 11216.5635, 11486.6124] +25-08-28 10:16:54 | D | - best error = [ 9009.3729, 9009.3729, 9009.3729, 9009.3729, 9009.3729] +25-08-28 10:16:54 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:16:54 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:16:54 | D | - sum error = [ 9668.7674, 9474.1442, 9343.5655, 9236.2304, 9171.8046] +25-08-28 10:16:54 | D | - best error = [ 9009.3729, 9009.3729, 9009.3729, 9009.3729, 9009.3729] +25-08-28 10:16:54 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:16:54 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:16:54 | D | - sum error = [ 9120.4088, 9068.4698, 9050.7874, 9070.0761, 9152.2177] +25-08-28 10:16:54 | D | - best error = [ 9009.3729, 9009.3729, 9009.3729, 9009.3729, 9009.3729] +25-08-28 10:16:54 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:16:54 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:16:54 | D | - sum error = [ 9287.1533, 9447.8175, 9602.2789, 9848.6056, 10087.6499] +25-08-28 10:16:54 | D | - best error = [ 9009.3729, 9009.3729, 9009.3729, 9009.3729, 9009.3729] +25-08-28 10:16:54 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:16:54 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:16:54 | D | - sum error = [10303.9107, 10635.8842, 10964.4294, 11325.1907] +25-08-28 10:16:54 | D | - best error = [ 9009.3729, 9009.3729, 9009.3729, 9009.3729] +25-08-28 10:16:54 | D | + error = 9009.3729 +25-08-28 10:16:54 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 10:17:15 | D | - Smoothing Diffusion Block single_transformer_blocks.3 +25-08-28 10:17:15 | D | - Skipping Module single_transformer_blocks.3.norm.linear +25-08-28 10:17:15 | D | - Smoothing Transformer Block single_transformer_blocks.3 +25-08-28 10:17:15 | D | - single_transformer_blocks.3.attn.qkv_proj + single_transformer_blocks.3.up_proj +25-08-28 10:17:15 | D | + w: sint4 +25-08-28 10:17:15 | D | + x: sint4 +25-08-28 10:17:15 | D | + y: None +25-08-28 10:17:15 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:17:15 | D | + finished parsing calibration arguments, ram usage: 16.5 +25-08-28 10:17:15 | D | + x - AbsMax +25-08-28 10:17:15 | D | + x = [min=0.1768, max=31.6250] +25-08-28 10:17:15 | D | + w - AbsMax +25-08-28 10:17:15 | D | + w = [min=0.0913, max=0.5859] +25-08-28 10:17:15 | D | + finished resetting calibrator, ram usage: 16.5 +25-08-28 10:17:17 | D | + finished calculating the original outputs, ram usage: 16.7 +25-08-28 10:19:21 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:19:21 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:19:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:19:21 | D | - sum error = [26137.1607, 25707.5317, 25141.3362, 25646.0112, 24472.9925] +25-08-28 10:19:21 | D | - best error = [26137.1607, 25707.5317, 25141.3362, 25141.3362, 24472.9925] +25-08-28 10:19:21 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:19:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:19:21 | D | - sum error = [23883.2279, 23575.6525, 23409.7416, 23215.0117, 23212.4904] +25-08-28 10:19:21 | D | - best error = [23883.2279, 23575.6525, 23409.7416, 23215.0117, 23212.4904] +25-08-28 10:19:21 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:19:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:19:21 | D | - sum error = [23295.3654, 23522.4747, 24230.7415, 23635.3457, 23479.4712] +25-08-28 10:19:21 | D | - best error = [23212.4904, 23212.4904, 23212.4904, 23212.4904, 23212.4904] +25-08-28 10:19:21 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:19:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:19:21 | D | - sum error = [23888.3845, 24500.1002, 24427.1390, 25102.0726, 25545.4043] +25-08-28 10:19:21 | D | - best error = [23212.4904, 23212.4904, 23212.4904, 23212.4904, 23212.4904] +25-08-28 10:19:21 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:19:21 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:19:21 | D | - sum error = [30345.1906, 29270.7537, 28360.6082, 27693.2049, 26860.0331] +25-08-28 10:19:21 | D | - best error = [23212.4904, 23212.4904, 23212.4904, 23212.4904, 23212.4904] +25-08-28 10:19:21 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:19:21 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:19:21 | D | - sum error = [26382.0703, 25919.7753, 25521.7078, 25103.0465, 24934.9336] +25-08-28 10:19:21 | D | - best error = [23212.4904, 23212.4904, 23212.4904, 23212.4904, 23212.4904] +25-08-28 10:19:21 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:19:21 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:19:21 | D | - sum error = [25039.2511, 24570.8888, 24567.6236, 24212.3420, 24024.2358] +25-08-28 10:19:21 | D | - best error = [23212.4904, 23212.4904, 23212.4904, 23212.4904, 23212.4904] +25-08-28 10:19:21 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:19:21 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:19:21 | D | - sum error = [24236.6915, 24314.9139, 24833.6816, 25571.4165] +25-08-28 10:19:21 | D | - best error = [23212.4904, 23212.4904, 23212.4904, 23212.4904] +25-08-28 10:19:21 | D | + error = 23212.4904 +25-08-28 10:19:21 | D | + scale = [min=0.4585, max=4.7317] +25-08-28 10:19:21 | D | - single_transformer_blocks.3.attn.out_proj +25-08-28 10:19:21 | D | + w: sint4 +25-08-28 10:19:21 | D | + x: sint4 +25-08-28 10:19:21 | D | + y: None +25-08-28 10:19:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:19:21 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:19:22 | D | + x - AbsMax +25-08-28 10:19:22 | D | + x = [min=2.1094, max=14.8125] +25-08-28 10:19:22 | D | + w - AbsMax +25-08-28 10:19:22 | D | + w = [min=0.1177, max=0.2949] +25-08-28 10:19:22 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:19:23 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:20:23 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:20:23 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:20:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:20:23 | D | - sum error = [ 7420.0526, 7397.0767, 7378.7148, 7375.1363, 7378.8747] +25-08-28 10:20:23 | D | - best error = [ 7420.0526, 7397.0767, 7378.7148, 7375.1363, 7375.1363] +25-08-28 10:20:23 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:20:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:20:23 | D | - sum error = [ 7386.8665, 7404.6245, 7412.0742, 7430.8207, 7477.3995] +25-08-28 10:20:23 | D | - best error = [ 7375.1363, 7375.1363, 7375.1363, 7375.1363, 7375.1363] +25-08-28 10:20:23 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:20:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:20:23 | D | - sum error = [ 7506.9877, 7552.9298, 7598.6146, 7664.1743, 7718.9305] +25-08-28 10:20:23 | D | - best error = [ 7375.1363, 7375.1363, 7375.1363, 7375.1363, 7375.1363] +25-08-28 10:20:23 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:20:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:20:23 | D | - sum error = [ 7783.6557, 7856.4590, 7946.6491, 8040.1383, 8122.0286] +25-08-28 10:20:23 | D | - best error = [ 7375.1363, 7375.1363, 7375.1363, 7375.1363, 7375.1363] +25-08-28 10:20:23 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:20:23 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:20:23 | D | - sum error = [ 7684.1465, 7640.1123, 7602.1300, 7590.3128, 7561.5144] +25-08-28 10:20:23 | D | - best error = [ 7375.1363, 7375.1363, 7375.1363, 7375.1363, 7375.1363] +25-08-28 10:20:23 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:20:23 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:20:23 | D | - sum error = [ 7544.4926, 7543.0007, 7548.7787, 7564.1240, 7573.1050] +25-08-28 10:20:23 | D | - best error = [ 7375.1363, 7375.1363, 7375.1363, 7375.1363, 7375.1363] +25-08-28 10:20:23 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:20:23 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:20:23 | D | - sum error = [ 7614.6795, 7646.5498, 7685.7942, 7746.9114, 7815.0606] +25-08-28 10:20:23 | D | - best error = [ 7375.1363, 7375.1363, 7375.1363, 7375.1363, 7375.1363] +25-08-28 10:20:23 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:20:23 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:20:23 | D | - sum error = [ 7873.7950, 7948.5116, 8033.3637, 8118.7033] +25-08-28 10:20:23 | D | - best error = [ 7375.1363, 7375.1363, 7375.1363, 7375.1363] +25-08-28 10:20:23 | D | + error = 7375.1363 +25-08-28 10:20:23 | D | + scale = [min=1.1185, max=1.4983] +25-08-28 10:20:23 | D | - single_transformer_blocks.3.down_proj +25-08-28 10:20:23 | D | + w: sint4 +25-08-28 10:20:23 | D | + x: uint4 +25-08-28 10:20:23 | D | + y: None +25-08-28 10:20:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:20:23 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:20:26 | D | + x - AbsMax +25-08-28 10:20:26 | D | + x = [min=1.8516, max=19.3750] +25-08-28 10:20:26 | D | + w - AbsMax +25-08-28 10:20:26 | D | + w = [min=0.1152, max=0.9180] +25-08-28 10:20:26 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:20:28 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:22:31 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:22:31 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:22:31 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:22:31 | D | - sum error = [ 8678.0357, 8750.9487, 8781.8750, 8848.1560, 8861.8951] +25-08-28 10:22:31 | D | - best error = [ 8678.0357, 8678.0357, 8678.0357, 8678.0357, 8678.0357] +25-08-28 10:22:31 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:22:31 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:22:31 | D | - sum error = [ 8956.5350, 9029.5200, 9135.2224, 9253.5515, 9427.3419] +25-08-28 10:22:31 | D | - best error = [ 8678.0357, 8678.0357, 8678.0357, 8678.0357, 8678.0357] +25-08-28 10:22:31 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:22:31 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:22:31 | D | - sum error = [ 9554.0756, 9694.7252, 9829.9713, 10036.3579, 10271.2944] +25-08-28 10:22:31 | D | - best error = [ 8678.0357, 8678.0357, 8678.0357, 8678.0357, 8678.0357] +25-08-28 10:22:31 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:22:31 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:22:31 | D | - sum error = [10481.1702, 10716.3651, 10972.5669, 11214.4114, 11485.8041] +25-08-28 10:22:31 | D | - best error = [ 8678.0357, 8678.0357, 8678.0357, 8678.0357, 8678.0357] +25-08-28 10:22:31 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:22:31 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:22:31 | D | - sum error = [ 9073.7432, 8945.5625, 8838.2157, 8742.7511, 8663.5392] +25-08-28 10:22:31 | D | - best error = [ 8678.0357, 8678.0357, 8678.0357, 8678.0357, 8663.5392] +25-08-28 10:22:31 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:22:31 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:22:31 | D | - sum error = [ 8624.9239, 8657.8478, 8718.9908, 8779.2401, 8828.9931] +25-08-28 10:22:31 | D | - best error = [ 8624.9239, 8624.9239, 8624.9239, 8624.9239, 8624.9239] +25-08-28 10:22:31 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:22:31 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:22:31 | D | - sum error = [ 8979.3982, 9139.9160, 9378.1713, 9551.0946, 9870.1810] +25-08-28 10:22:31 | D | - best error = [ 8624.9239, 8624.9239, 8624.9239, 8624.9239, 8624.9239] +25-08-28 10:22:31 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:22:31 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:22:31 | D | - sum error = [10168.0964, 10525.6065, 10927.1486, 11291.5877] +25-08-28 10:22:31 | D | - best error = [ 8624.9239, 8624.9239, 8624.9239, 8624.9239] +25-08-28 10:22:31 | D | + error = 8624.9239 +25-08-28 10:22:31 | D | + scale = [min=1.6388, max=7.6343] +25-08-28 10:22:52 | D | - Smoothing Diffusion Block single_transformer_blocks.4 +25-08-28 10:22:52 | D | - Skipping Module single_transformer_blocks.4.norm.linear +25-08-28 10:22:52 | D | - Smoothing Transformer Block single_transformer_blocks.4 +25-08-28 10:22:52 | D | - single_transformer_blocks.4.attn.qkv_proj + single_transformer_blocks.4.up_proj +25-08-28 10:22:52 | D | + w: sint4 +25-08-28 10:22:52 | D | + x: sint4 +25-08-28 10:22:52 | D | + y: None +25-08-28 10:22:52 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:22:52 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:22:53 | D | + x - AbsMax +25-08-28 10:22:53 | D | + x = [min=0.1226, max=28.5000] +25-08-28 10:22:53 | D | + w - AbsMax +25-08-28 10:22:53 | D | + w = [min=0.0850, max=0.6367] +25-08-28 10:22:53 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:22:54 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:25:00 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:25:00 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:25:00 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:25:00 | D | - sum error = [26939.7811, 26309.0555, 26038.4639, 25430.3353, 25192.8129] +25-08-28 10:25:00 | D | - best error = [26939.7811, 26309.0555, 26038.4639, 25430.3353, 25192.8129] +25-08-28 10:25:00 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:25:00 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:25:00 | D | - sum error = [25080.8025, 24969.1752, 24913.0520, 24769.2790, 24475.0649] +25-08-28 10:25:00 | D | - best error = [25080.8025, 24969.1752, 24913.0520, 24769.2790, 24475.0649] +25-08-28 10:25:00 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:25:00 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:25:00 | D | - sum error = [24569.8417, 24472.4773, 25372.3743, 25137.9401, 25221.2177] +25-08-28 10:25:00 | D | - best error = [24475.0649, 24472.4773, 24472.4773, 24472.4773, 24472.4773] +25-08-28 10:25:00 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:25:00 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:25:00 | D | - sum error = [25371.4473, 25795.2814, 25970.9130, 26366.4611, 26715.7889] +25-08-28 10:25:00 | D | - best error = [24472.4773, 24472.4773, 24472.4773, 24472.4773, 24472.4773] +25-08-28 10:25:00 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:25:00 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:25:00 | D | - sum error = [34298.2644, 33048.3244, 31843.9575, 30757.8787, 30035.5033] +25-08-28 10:25:00 | D | - best error = [24472.4773, 24472.4773, 24472.4773, 24472.4773, 24472.4773] +25-08-28 10:25:00 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:25:00 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:25:00 | D | - sum error = [29300.0008, 28702.4637, 28218.6276, 27662.3261, 27304.1254] +25-08-28 10:25:00 | D | - best error = [24472.4773, 24472.4773, 24472.4773, 24472.4773, 24472.4773] +25-08-28 10:25:00 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:25:00 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:25:00 | D | - sum error = [26861.4567, 26803.5269, 26572.7138, 26656.9424, 26457.6316] +25-08-28 10:25:00 | D | - best error = [24472.4773, 24472.4773, 24472.4773, 24472.4773, 24472.4773] +25-08-28 10:25:00 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:25:00 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:25:00 | D | - sum error = [27211.1001, 26679.1676, 26644.3228, 26941.2623] +25-08-28 10:25:00 | D | - best error = [24472.4773, 24472.4773, 24472.4773, 24472.4773] +25-08-28 10:25:00 | D | + error = 24472.4773 +25-08-28 10:25:00 | D | + scale = [min=0.3152, max=6.3120] +25-08-28 10:25:01 | D | - single_transformer_blocks.4.attn.out_proj +25-08-28 10:25:01 | D | + w: sint4 +25-08-28 10:25:01 | D | + x: sint4 +25-08-28 10:25:01 | D | + y: None +25-08-28 10:25:01 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:25:01 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:25:02 | D | + x - AbsMax +25-08-28 10:25:02 | D | + x = [min=1.7266, max=15.2500] +25-08-28 10:25:02 | D | + w - AbsMax +25-08-28 10:25:02 | D | + w = [min=0.1182, max=0.3574] +25-08-28 10:25:02 | D | + finished resetting calibrator, ram usage: 18.3 +25-08-28 10:25:03 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:26:03 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:26:03 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:26:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:26:03 | D | - sum error = [ 6937.8807, 6925.7756, 6913.6656, 6920.6286, 6926.6312] +25-08-28 10:26:03 | D | - best error = [ 6937.8807, 6925.7756, 6913.6656, 6913.6656, 6913.6656] +25-08-28 10:26:03 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:26:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:26:03 | D | - sum error = [ 6936.1638, 6953.6615, 6981.1108, 7007.9703, 7035.2348] +25-08-28 10:26:03 | D | - best error = [ 6913.6656, 6913.6656, 6913.6656, 6913.6656, 6913.6656] +25-08-28 10:26:03 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:26:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:26:03 | D | - sum error = [ 7070.4843, 7122.2598, 7170.4184, 7245.0444, 7306.2877] +25-08-28 10:26:03 | D | - best error = [ 6913.6656, 6913.6656, 6913.6656, 6913.6656, 6913.6656] +25-08-28 10:26:03 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:26:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:26:03 | D | - sum error = [ 7362.6892, 7444.5572, 7524.7427, 7616.8550, 7717.1648] +25-08-28 10:26:03 | D | - best error = [ 6913.6656, 6913.6656, 6913.6656, 6913.6656, 6913.6656] +25-08-28 10:26:03 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:26:03 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:26:03 | D | - sum error = [ 7293.0243, 7248.4958, 7211.6392, 7185.9621, 7159.4346] +25-08-28 10:26:03 | D | - best error = [ 6913.6656, 6913.6656, 6913.6656, 6913.6656, 6913.6656] +25-08-28 10:26:03 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:26:03 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:26:03 | D | - sum error = [ 7147.2227, 7137.3628, 7145.3873, 7159.2787, 7176.9863] +25-08-28 10:26:03 | D | - best error = [ 6913.6656, 6913.6656, 6913.6656, 6913.6656, 6913.6656] +25-08-28 10:26:03 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:26:03 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:26:03 | D | - sum error = [ 7193.0489, 7237.3512, 7280.1247, 7323.9432, 7386.0385] +25-08-28 10:26:03 | D | - best error = [ 6913.6656, 6913.6656, 6913.6656, 6913.6656, 6913.6656] +25-08-28 10:26:03 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:26:03 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:26:03 | D | - sum error = [ 7459.9488, 7540.4111, 7621.9580, 7731.0708] +25-08-28 10:26:03 | D | - best error = [ 6913.6656, 6913.6656, 6913.6656, 6913.6656] +25-08-28 10:26:03 | D | + error = 6913.6656 +25-08-28 10:26:03 | D | + scale = [min=1.0561, max=1.3132] +25-08-28 10:26:04 | D | - single_transformer_blocks.4.down_proj +25-08-28 10:26:04 | D | + w: sint4 +25-08-28 10:26:04 | D | + x: uint4 +25-08-28 10:26:04 | D | + y: None +25-08-28 10:26:04 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:26:04 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:26:07 | D | + x - AbsMax +25-08-28 10:26:07 | D | + x = [min=1.8047, max=19.8750] +25-08-28 10:26:07 | D | + w - AbsMax +25-08-28 10:26:07 | D | + w = [min=0.1143, max=1.2422] +25-08-28 10:26:07 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:26:09 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:28:12 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:28:12 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:28:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:28:12 | D | - sum error = [ 8587.2780, 8561.6017, 8624.4819, 8705.6170, 8769.3051] +25-08-28 10:28:12 | D | - best error = [ 8587.2780, 8561.6017, 8561.6017, 8561.6017, 8561.6017] +25-08-28 10:28:12 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:28:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:28:12 | D | - sum error = [ 8834.7912, 8948.2065, 8973.5754, 9068.9307, 9207.0047] +25-08-28 10:28:12 | D | - best error = [ 8561.6017, 8561.6017, 8561.6017, 8561.6017, 8561.6017] +25-08-28 10:28:12 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:28:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:28:12 | D | - sum error = [ 9339.8009, 9476.2541, 9657.9302, 9872.9570, 10030.9680] +25-08-28 10:28:12 | D | - best error = [ 8561.6017, 8561.6017, 8561.6017, 8561.6017, 8561.6017] +25-08-28 10:28:12 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:28:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:28:12 | D | - sum error = [10257.2608, 10487.2200, 10691.1043, 10969.9933, 11291.2749] +25-08-28 10:28:12 | D | - best error = [ 8561.6017, 8561.6017, 8561.6017, 8561.6017, 8561.6017] +25-08-28 10:28:12 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:28:12 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:28:12 | D | - sum error = [ 9060.2693, 8912.6616, 8772.9183, 8683.9521, 8616.9387] +25-08-28 10:28:12 | D | - best error = [ 8561.6017, 8561.6017, 8561.6017, 8561.6017, 8561.6017] +25-08-28 10:28:12 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:28:12 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:28:12 | D | - sum error = [ 8608.0167, 8580.5012, 8615.4958, 8672.6586, 8773.6884] +25-08-28 10:28:12 | D | - best error = [ 8561.6017, 8561.6017, 8561.6017, 8561.6017, 8561.6017] +25-08-28 10:28:12 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:28:12 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:28:12 | D | - sum error = [ 8903.9456, 9064.8234, 9241.0523, 9452.3491, 9729.8177] +25-08-28 10:28:12 | D | - best error = [ 8561.6017, 8561.6017, 8561.6017, 8561.6017, 8561.6017] +25-08-28 10:28:12 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:28:12 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:28:12 | D | - sum error = [ 9990.6877, 10342.6457, 10688.1164, 11105.8752] +25-08-28 10:28:12 | D | - best error = [ 8561.6017, 8561.6017, 8561.6017, 8561.6017] +25-08-28 10:28:12 | D | + error = 8561.6017 +25-08-28 10:28:12 | D | + scale = [min=1.0300, max=1.1612] +25-08-28 10:28:31 | D | - Smoothing Diffusion Block single_transformer_blocks.5 +25-08-28 10:28:31 | D | - Skipping Module single_transformer_blocks.5.norm.linear +25-08-28 10:28:31 | D | - Smoothing Transformer Block single_transformer_blocks.5 +25-08-28 10:28:31 | D | - single_transformer_blocks.5.attn.qkv_proj + single_transformer_blocks.5.up_proj +25-08-28 10:28:31 | D | + w: sint4 +25-08-28 10:28:31 | D | + x: sint4 +25-08-28 10:28:31 | D | + y: None +25-08-28 10:28:31 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:28:31 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:28:32 | D | + x - AbsMax +25-08-28 10:28:32 | D | + x = [min=0.1709, max=26.1250] +25-08-28 10:28:32 | D | + w - AbsMax +25-08-28 10:28:32 | D | + w = [min=0.0820, max=0.5352] +25-08-28 10:28:32 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:28:33 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:30:44 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:30:44 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:30:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:30:44 | D | - sum error = [29691.1666, 28944.1990, 28644.8409, 28045.9828, 27869.4511] +25-08-28 10:30:44 | D | - best error = [29691.1666, 28944.1990, 28644.8409, 28045.9828, 27869.4511] +25-08-28 10:30:44 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:30:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:30:44 | D | - sum error = [28640.3385, 28077.5327, 27789.8030, 27234.0481, 27437.2675] +25-08-28 10:30:44 | D | - best error = [27869.4511, 27869.4511, 27789.8030, 27234.0481, 27234.0481] +25-08-28 10:30:44 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:30:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:30:44 | D | - sum error = [26828.9372, 27187.7337, 27444.5664, 27508.6767, 27392.9205] +25-08-28 10:30:44 | D | - best error = [26828.9372, 26828.9372, 26828.9372, 26828.9372, 26828.9372] +25-08-28 10:30:44 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:30:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:30:44 | D | - sum error = [27755.9236, 27935.7503, 28292.9102, 28655.8861, 31221.7042] +25-08-28 10:30:44 | D | - best error = [26828.9372, 26828.9372, 26828.9372, 26828.9372, 26828.9372] +25-08-28 10:30:44 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:30:44 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:30:44 | D | - sum error = [34967.4354, 33366.4186, 32481.7655, 31931.8855, 31401.9546] +25-08-28 10:30:44 | D | - best error = [26828.9372, 26828.9372, 26828.9372, 26828.9372, 26828.9372] +25-08-28 10:30:44 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:30:44 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:30:44 | D | - sum error = [30341.8739, 29411.8659, 28775.9967, 28545.8609, 28546.0078] +25-08-28 10:30:44 | D | - best error = [26828.9372, 26828.9372, 26828.9372, 26828.9372, 26828.9372] +25-08-28 10:30:44 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:30:44 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:30:44 | D | - sum error = [28261.1626, 28401.8016, 28189.0589, 28129.2059, 29335.4448] +25-08-28 10:30:44 | D | - best error = [26828.9372, 26828.9372, 26828.9372, 26828.9372, 26828.9372] +25-08-28 10:30:44 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:30:44 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:30:44 | D | - sum error = [28875.6462, 28543.5168, 29536.0141, 30265.4824] +25-08-28 10:30:44 | D | - best error = [26828.9372, 26828.9372, 26828.9372, 26828.9372] +25-08-28 10:30:44 | D | + error = 26828.9372 +25-08-28 10:30:44 | D | + scale = [min=0.4134, max=5.1113] +25-08-28 10:30:45 | D | - single_transformer_blocks.5.attn.out_proj +25-08-28 10:30:45 | D | + w: sint4 +25-08-28 10:30:45 | D | + x: sint4 +25-08-28 10:30:45 | D | + y: None +25-08-28 10:30:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:30:45 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:30:45 | D | + x - AbsMax +25-08-28 10:30:45 | D | + x = [min=1.8125, max=14.3125] +25-08-28 10:30:45 | D | + w - AbsMax +25-08-28 10:30:45 | D | + w = [min=0.1196, max=0.3359] +25-08-28 10:30:45 | D | + finished resetting calibrator, ram usage: 18.3 +25-08-28 10:30:47 | D | + finished calculating the original outputs, ram usage: 18.3 +25-08-28 10:31:47 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:31:47 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:31:47 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:31:47 | D | - sum error = [ 6440.4137, 6427.5109, 6411.4304, 6411.0985, 6423.4645] +25-08-28 10:31:47 | D | - best error = [ 6440.4137, 6427.5109, 6411.4304, 6411.0985, 6411.0985] +25-08-28 10:31:47 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:31:47 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:31:47 | D | - sum error = [ 6425.7569, 6440.3140, 6462.1185, 6483.7298, 6518.6256] +25-08-28 10:31:47 | D | - best error = [ 6411.0985, 6411.0985, 6411.0985, 6411.0985, 6411.0985] +25-08-28 10:31:47 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:31:47 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:31:47 | D | - sum error = [ 6552.3281, 6593.0078, 6637.1699, 6709.1162, 6776.6520] +25-08-28 10:31:47 | D | - best error = [ 6411.0985, 6411.0985, 6411.0985, 6411.0985, 6411.0985] +25-08-28 10:31:47 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:31:47 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:31:47 | D | - sum error = [ 6838.0319, 6905.2351, 6989.2606, 7091.0634, 7174.8628] +25-08-28 10:31:47 | D | - best error = [ 6411.0985, 6411.0985, 6411.0985, 6411.0985, 6411.0985] +25-08-28 10:31:47 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:31:47 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:31:47 | D | - sum error = [ 6754.4806, 6708.6574, 6675.9074, 6645.0577, 6617.7984] +25-08-28 10:31:47 | D | - best error = [ 6411.0985, 6411.0985, 6411.0985, 6411.0985, 6411.0985] +25-08-28 10:31:47 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:31:47 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:31:47 | D | - sum error = [ 6605.3554, 6607.6377, 6609.4242, 6617.1402, 6639.2256] +25-08-28 10:31:47 | D | - best error = [ 6411.0985, 6411.0985, 6411.0985, 6411.0985, 6411.0985] +25-08-28 10:31:47 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:31:47 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:31:47 | D | - sum error = [ 6667.0563, 6692.6868, 6741.2634, 6796.2693, 6851.0244] +25-08-28 10:31:47 | D | - best error = [ 6411.0985, 6411.0985, 6411.0985, 6411.0985, 6411.0985] +25-08-28 10:31:47 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:31:47 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:31:47 | D | - sum error = [ 6917.8650, 6994.7151, 7092.6210, 7173.5974] +25-08-28 10:31:47 | D | - best error = [ 6411.0985, 6411.0985, 6411.0985, 6411.0985] +25-08-28 10:31:47 | D | + error = 6411.0985 +25-08-28 10:31:47 | D | + scale = [min=1.0933, max=1.4906] +25-08-28 10:31:47 | D | - single_transformer_blocks.5.down_proj +25-08-28 10:31:47 | D | + w: sint4 +25-08-28 10:31:47 | D | + x: uint4 +25-08-28 10:31:47 | D | + y: None +25-08-28 10:31:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:31:47 | D | + finished parsing calibration arguments, ram usage: 18.3 +25-08-28 10:31:50 | D | + x - AbsMax +25-08-28 10:31:50 | D | + x = [min=1.8984, max=20.8750] +25-08-28 10:31:50 | D | + w - AbsMax +25-08-28 10:31:50 | D | + w = [min=0.1133, max=0.7773] +25-08-28 10:31:50 | D | + finished resetting calibrator, ram usage: 18.3 +25-08-28 10:31:52 | D | + finished calculating the original outputs, ram usage: 18.3 +25-08-28 10:33:54 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:33:54 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:33:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:33:54 | D | - sum error = [ 8445.4669, 8463.0987, 8513.8912, 8581.4031, 8622.0536] +25-08-28 10:33:54 | D | - best error = [ 8445.4669, 8445.4669, 8445.4669, 8445.4669, 8445.4669] +25-08-28 10:33:54 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:33:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:33:55 | D | - sum error = [ 8691.4749, 8803.0863, 8899.6719, 9009.3782, 9171.3406] +25-08-28 10:33:55 | D | - best error = [ 8445.4669, 8445.4669, 8445.4669, 8445.4669, 8445.4669] +25-08-28 10:33:55 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:33:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:33:55 | D | - sum error = [ 9321.7608, 9493.7748, 9623.5968, 9827.7851, 10055.0123] +25-08-28 10:33:55 | D | - best error = [ 8445.4669, 8445.4669, 8445.4669, 8445.4669, 8445.4669] +25-08-28 10:33:55 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:33:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:33:55 | D | - sum error = [10252.5521, 10506.3333, 10743.9676, 11026.4457, 11348.5830] +25-08-28 10:33:55 | D | - best error = [ 8445.4669, 8445.4669, 8445.4669, 8445.4669, 8445.4669] +25-08-28 10:33:55 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:33:55 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:33:55 | D | - sum error = [ 8860.4465, 8746.1470, 8661.7938, 8554.3640, 8505.4726] +25-08-28 10:33:55 | D | - best error = [ 8445.4669, 8445.4669, 8445.4669, 8445.4669, 8445.4669] +25-08-28 10:33:55 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:33:55 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:33:55 | D | - sum error = [ 8465.4460, 8447.0539, 8525.0206, 8570.5374, 8683.2482] +25-08-28 10:33:55 | D | - best error = [ 8445.4669, 8445.4669, 8445.4669, 8445.4669, 8445.4669] +25-08-28 10:33:55 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:33:55 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:33:55 | D | - sum error = [ 8819.7206, 8981.8123, 9226.8226, 9462.5768, 9702.2558] +25-08-28 10:33:55 | D | - best error = [ 8445.4669, 8445.4669, 8445.4669, 8445.4669, 8445.4669] +25-08-28 10:33:55 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:33:55 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:33:55 | D | - sum error = [10038.5095, 10357.7739, 10733.5023, 11173.5309] +25-08-28 10:33:55 | D | - best error = [ 8445.4669, 8445.4669, 8445.4669, 8445.4669] +25-08-28 10:33:55 | D | + error = 8445.4669 +25-08-28 10:33:55 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 10:34:15 | D | - Smoothing Diffusion Block single_transformer_blocks.6 +25-08-28 10:34:15 | D | - Skipping Module single_transformer_blocks.6.norm.linear +25-08-28 10:34:15 | D | - Smoothing Transformer Block single_transformer_blocks.6 +25-08-28 10:34:15 | D | - single_transformer_blocks.6.attn.qkv_proj + single_transformer_blocks.6.up_proj +25-08-28 10:34:15 | D | + w: sint4 +25-08-28 10:34:15 | D | + x: sint4 +25-08-28 10:34:15 | D | + y: None +25-08-28 10:34:15 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:34:15 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:34:16 | D | + x - AbsMax +25-08-28 10:34:16 | D | + x = [min=0.1621, max=23.1250] +25-08-28 10:34:16 | D | + w - AbsMax +25-08-28 10:34:16 | D | + w = [min=0.0869, max=0.6016] +25-08-28 10:34:16 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:34:18 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:36:24 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:36:24 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:36:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:36:24 | D | - sum error = [36745.0093, 36160.6248, 35082.3306, 34936.5157, 34701.5730] +25-08-28 10:36:24 | D | - best error = [36745.0093, 36160.6248, 35082.3306, 34936.5157, 34701.5730] +25-08-28 10:36:24 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:36:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:36:24 | D | - sum error = [34290.9938, 32872.2930, 32330.5903, 31911.8125, 31350.1587] +25-08-28 10:36:24 | D | - best error = [34290.9938, 32872.2930, 32330.5903, 31911.8125, 31350.1587] +25-08-28 10:36:24 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:36:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:36:24 | D | - sum error = [31158.1586, 31198.4902, 30731.3489, 30996.1505, 31085.9680] +25-08-28 10:36:24 | D | - best error = [31158.1586, 31158.1586, 30731.3489, 30731.3489, 30731.3489] +25-08-28 10:36:24 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:36:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:36:24 | D | - sum error = [31214.6745, 31497.9557, 31714.9888, 32068.6713, 33003.5925] +25-08-28 10:36:24 | D | - best error = [30731.3489, 30731.3489, 30731.3489, 30731.3489, 30731.3489] +25-08-28 10:36:24 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:36:24 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:36:24 | D | - sum error = [49368.6761, 47054.4876, 44615.2534, 42999.9144, 42000.1676] +25-08-28 10:36:24 | D | - best error = [30731.3489, 30731.3489, 30731.3489, 30731.3489, 30731.3489] +25-08-28 10:36:24 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:36:24 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:36:24 | D | - sum error = [39470.8228, 38246.4279, 36943.6691, 36279.4575, 35235.9852] +25-08-28 10:36:24 | D | - best error = [30731.3489, 30731.3489, 30731.3489, 30731.3489, 30731.3489] +25-08-28 10:36:24 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:36:24 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:36:24 | D | - sum error = [34477.2305, 33728.4896, 32902.1278, 32974.6142, 32701.9295] +25-08-28 10:36:24 | D | - best error = [30731.3489, 30731.3489, 30731.3489, 30731.3489, 30731.3489] +25-08-28 10:36:24 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:36:24 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:36:24 | D | - sum error = [32449.3254, 32446.3984, 32701.1104, 33405.5382] +25-08-28 10:36:24 | D | - best error = [30731.3489, 30731.3489, 30731.3489, 30731.3489] +25-08-28 10:36:24 | D | + error = 30731.3489 +25-08-28 10:36:24 | D | + scale = [min=0.3356, max=6.5834] +25-08-28 10:36:24 | D | - single_transformer_blocks.6.attn.out_proj +25-08-28 10:36:24 | D | + w: sint4 +25-08-28 10:36:24 | D | + x: sint4 +25-08-28 10:36:24 | D | + y: None +25-08-28 10:36:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:36:24 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:36:24 | D | + x - AbsMax +25-08-28 10:36:24 | D | + x = [min=1.9297, max=13.4375] +25-08-28 10:36:24 | D | + w - AbsMax +25-08-28 10:36:24 | D | + w = [min=0.1167, max=0.2969] +25-08-28 10:36:24 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:36:25 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:37:22 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:37:22 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:37:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:37:22 | D | - sum error = [ 6769.0224, 6747.6301, 6725.3479, 6717.7580, 6709.2694] +25-08-28 10:37:22 | D | - best error = [ 6769.0224, 6747.6301, 6725.3479, 6717.7580, 6709.2694] +25-08-28 10:37:22 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:37:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:37:22 | D | - sum error = [ 6708.3733, 6711.2383, 6724.0140, 6730.8499, 6753.8993] +25-08-28 10:37:22 | D | - best error = [ 6708.3733, 6708.3733, 6708.3733, 6708.3733, 6708.3733] +25-08-28 10:37:22 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:37:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:37:22 | D | - sum error = [ 6776.8237, 6811.2419, 6854.1056, 6910.6324, 6952.9033] +25-08-28 10:37:22 | D | - best error = [ 6708.3733, 6708.3733, 6708.3733, 6708.3733, 6708.3733] +25-08-28 10:37:22 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:37:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:37:22 | D | - sum error = [ 7001.6032, 7066.0427, 7128.9604, 7217.6783, 7284.4503] +25-08-28 10:37:22 | D | - best error = [ 6708.3733, 6708.3733, 6708.3733, 6708.3733, 6708.3733] +25-08-28 10:37:22 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:37:22 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:37:22 | D | - sum error = [ 7008.5685, 6961.5829, 6919.1728, 6895.3236, 6864.4379] +25-08-28 10:37:22 | D | - best error = [ 6708.3733, 6708.3733, 6708.3733, 6708.3733, 6708.3733] +25-08-28 10:37:22 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:37:22 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:37:22 | D | - sum error = [ 6849.3655, 6841.0689, 6836.0360, 6843.7648, 6860.4924] +25-08-28 10:37:22 | D | - best error = [ 6708.3733, 6708.3733, 6708.3733, 6708.3733, 6708.3733] +25-08-28 10:37:22 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:37:22 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:37:22 | D | - sum error = [ 6878.7124, 6893.2495, 6925.2041, 6962.2252, 7025.7552] +25-08-28 10:37:22 | D | - best error = [ 6708.3733, 6708.3733, 6708.3733, 6708.3733, 6708.3733] +25-08-28 10:37:22 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:37:22 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:37:22 | D | - sum error = [ 7066.6564, 7128.5524, 7211.5918, 7281.1452] +25-08-28 10:37:22 | D | - best error = [ 6708.3733, 6708.3733, 6708.3733, 6708.3733] +25-08-28 10:37:22 | D | + error = 6708.3733 +25-08-28 10:37:22 | D | + scale = [min=1.1786, max=1.9146] +25-08-28 10:37:23 | D | - single_transformer_blocks.6.down_proj +25-08-28 10:37:23 | D | + w: sint4 +25-08-28 10:37:23 | D | + x: uint4 +25-08-28 10:37:23 | D | + y: None +25-08-28 10:37:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:37:23 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:37:26 | D | + x - AbsMax +25-08-28 10:37:26 | D | + x = [min=1.0391, max=18.1250] +25-08-28 10:37:26 | D | + w - AbsMax +25-08-28 10:37:26 | D | + w = [min=0.1177, max=1.2578] +25-08-28 10:37:26 | D | + finished resetting calibrator, ram usage: 18.3 +25-08-28 10:37:28 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:39:24 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:39:24 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:39:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:39:24 | D | - sum error = [ 8567.6989, 8564.0336, 8571.4503, 8630.9837, 8692.2154] +25-08-28 10:39:24 | D | - best error = [ 8567.6989, 8564.0336, 8564.0336, 8564.0336, 8564.0336] +25-08-28 10:39:24 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:39:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:39:24 | D | - sum error = [ 8774.9006, 8807.3933, 8928.4870, 9073.7935, 9222.9735] +25-08-28 10:39:24 | D | - best error = [ 8564.0336, 8564.0336, 8564.0336, 8564.0336, 8564.0336] +25-08-28 10:39:24 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:39:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:39:24 | D | - sum error = [ 9283.2486, 9446.0341, 9629.1985, 9828.0916, 10024.9828] +25-08-28 10:39:24 | D | - best error = [ 8564.0336, 8564.0336, 8564.0336, 8564.0336, 8564.0336] +25-08-28 10:39:24 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:39:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:39:24 | D | - sum error = [10254.0357, 10457.2218, 10741.0268, 10920.9296, 11186.0359] +25-08-28 10:39:24 | D | - best error = [ 8564.0336, 8564.0336, 8564.0336, 8564.0336, 8564.0336] +25-08-28 10:39:24 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:39:24 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:39:24 | D | - sum error = [ 9214.8150, 9019.3889, 8879.0261, 8812.4076, 8711.7902] +25-08-28 10:39:24 | D | - best error = [ 8564.0336, 8564.0336, 8564.0336, 8564.0336, 8564.0336] +25-08-28 10:39:24 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:39:24 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:39:24 | D | - sum error = [ 8646.9444, 8656.0342, 8666.5112, 8701.6762, 8791.2452] +25-08-28 10:39:24 | D | - best error = [ 8564.0336, 8564.0336, 8564.0336, 8564.0336, 8564.0336] +25-08-28 10:39:24 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:39:24 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:39:24 | D | - sum error = [ 8913.1839, 9096.5637, 9262.1436, 9444.3058, 9691.9418] +25-08-28 10:39:24 | D | - best error = [ 8564.0336, 8564.0336, 8564.0336, 8564.0336, 8564.0336] +25-08-28 10:39:24 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:39:24 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:39:24 | D | - sum error = [10050.8778, 10322.1261, 10677.1895, 11098.2752] +25-08-28 10:39:24 | D | - best error = [ 8564.0336, 8564.0336, 8564.0336, 8564.0336] +25-08-28 10:39:24 | D | + error = 8564.0336 +25-08-28 10:39:24 | D | + scale = [min=1.0019, max=1.1559] +25-08-28 10:39:44 | D | - Smoothing Diffusion Block single_transformer_blocks.7 +25-08-28 10:39:44 | D | - Skipping Module single_transformer_blocks.7.norm.linear +25-08-28 10:39:44 | D | - Smoothing Transformer Block single_transformer_blocks.7 +25-08-28 10:39:44 | D | - single_transformer_blocks.7.attn.qkv_proj + single_transformer_blocks.7.up_proj +25-08-28 10:39:44 | D | + w: sint4 +25-08-28 10:39:44 | D | + x: sint4 +25-08-28 10:39:44 | D | + y: None +25-08-28 10:39:44 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:39:44 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:39:44 | D | + x - AbsMax +25-08-28 10:39:44 | D | + x = [min=0.1465, max=22.5000] +25-08-28 10:39:44 | D | + w - AbsMax +25-08-28 10:39:44 | D | + w = [min=0.0942, max=0.5703] +25-08-28 10:39:44 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:39:45 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:41:55 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:41:55 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:41:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:41:55 | D | - sum error = [40068.7282, 39585.4891, 38017.5118, 37372.5615, 37228.5379] +25-08-28 10:41:55 | D | - best error = [40068.7282, 39585.4891, 38017.5118, 37372.5615, 37228.5379] +25-08-28 10:41:55 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:41:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:41:55 | D | - sum error = [36323.1576, 35347.9201, 34970.1331, 34972.1572, 35483.9276] +25-08-28 10:41:55 | D | - best error = [36323.1576, 35347.9201, 34970.1331, 34970.1331, 34970.1331] +25-08-28 10:41:55 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:41:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:41:55 | D | - sum error = [33683.3291, 34840.9139, 33653.4280, 33190.4565, 33300.0747] +25-08-28 10:41:55 | D | - best error = [33683.3291, 33683.3291, 33653.4280, 33190.4565, 33190.4565] +25-08-28 10:41:55 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:41:55 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:41:55 | D | - sum error = [34347.1892, 34673.4601, 34942.1611, 34819.1818, 35440.9535] +25-08-28 10:41:55 | D | - best error = [33190.4565, 33190.4565, 33190.4565, 33190.4565, 33190.4565] +25-08-28 10:41:55 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:41:55 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:41:55 | D | - sum error = [48676.6336, 47137.9796, 45816.5441, 43373.9166, 42247.4054] +25-08-28 10:41:55 | D | - best error = [33190.4565, 33190.4565, 33190.4565, 33190.4565, 33190.4565] +25-08-28 10:41:55 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:41:55 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:41:55 | D | - sum error = [41174.9609, 39526.3898, 38224.7970, 37258.2048, 36309.4205] +25-08-28 10:41:55 | D | - best error = [33190.4565, 33190.4565, 33190.4565, 33190.4565, 33190.4565] +25-08-28 10:41:55 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:41:55 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:41:55 | D | - sum error = [35609.8467, 35728.5249, 35305.0251, 35477.2463, 35043.9176] +25-08-28 10:41:55 | D | - best error = [33190.4565, 33190.4565, 33190.4565, 33190.4565, 33190.4565] +25-08-28 10:41:55 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:41:55 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:41:55 | D | - sum error = [34674.8665, 34853.3201, 34600.0009, 35585.7715] +25-08-28 10:41:55 | D | - best error = [33190.4565, 33190.4565, 33190.4565, 33190.4565] +25-08-28 10:41:55 | D | + error = 33190.4565 +25-08-28 10:41:55 | D | + scale = [min=0.2869, max=7.5669] +25-08-28 10:41:55 | D | - single_transformer_blocks.7.attn.out_proj +25-08-28 10:41:55 | D | + w: sint4 +25-08-28 10:41:55 | D | + x: sint4 +25-08-28 10:41:55 | D | + y: None +25-08-28 10:41:55 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:41:55 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:41:56 | D | + x - AbsMax +25-08-28 10:41:56 | D | + x = [min=1.6641, max=8.8125] +25-08-28 10:41:56 | D | + w - AbsMax +25-08-28 10:41:56 | D | + w = [min=0.1147, max=0.3379] +25-08-28 10:41:56 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:41:57 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:42:56 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:42:56 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:42:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:42:56 | D | - sum error = [ 6947.3402, 6924.7989, 6913.0248, 6890.8554, 6874.2668] +25-08-28 10:42:56 | D | - best error = [ 6947.3402, 6924.7989, 6913.0248, 6890.8554, 6874.2668] +25-08-28 10:42:56 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:42:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:42:56 | D | - sum error = [ 6868.7673, 6866.0626, 6868.5472, 6876.5835, 6879.8483] +25-08-28 10:42:56 | D | - best error = [ 6868.7673, 6866.0626, 6866.0626, 6866.0626, 6866.0626] +25-08-28 10:42:56 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:42:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:42:56 | D | - sum error = [ 6898.3105, 6919.6257, 6943.4300, 6968.9922, 6994.4135] +25-08-28 10:42:56 | D | - best error = [ 6866.0626, 6866.0626, 6866.0626, 6866.0626, 6866.0626] +25-08-28 10:42:56 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:42:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:42:56 | D | - sum error = [ 7032.4491, 7066.3595, 7111.2280, 7158.1196, 7209.9165] +25-08-28 10:42:56 | D | - best error = [ 6866.0626, 6866.0626, 6866.0626, 6866.0626, 6866.0626] +25-08-28 10:42:56 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:42:56 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:42:56 | D | - sum error = [ 7336.6431, 7273.3063, 7213.6601, 7176.4582, 7126.2029] +25-08-28 10:42:56 | D | - best error = [ 6866.0626, 6866.0626, 6866.0626, 6866.0626, 6866.0626] +25-08-28 10:42:56 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:42:56 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:42:56 | D | - sum error = [ 7097.6937, 7066.6620, 7037.1793, 7015.7292, 7016.5470] +25-08-28 10:42:56 | D | - best error = [ 6866.0626, 6866.0626, 6866.0626, 6866.0626, 6866.0626] +25-08-28 10:42:56 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:42:56 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:42:56 | D | - sum error = [ 7010.1215, 7015.1396, 7030.6005, 7040.5824, 7060.3110] +25-08-28 10:42:56 | D | - best error = [ 6866.0626, 6866.0626, 6866.0626, 6866.0626, 6866.0626] +25-08-28 10:42:56 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:42:56 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:42:56 | D | - sum error = [ 7078.4216, 7120.3531, 7160.1765, 7213.6293] +25-08-28 10:42:56 | D | - best error = [ 6866.0626, 6866.0626, 6866.0626, 6866.0626] +25-08-28 10:42:56 | D | + error = 6866.0626 +25-08-28 10:42:56 | D | + scale = [min=1.1651, max=1.9210] +25-08-28 10:42:56 | D | - single_transformer_blocks.7.down_proj +25-08-28 10:42:56 | D | + w: sint4 +25-08-28 10:42:56 | D | + x: uint4 +25-08-28 10:42:56 | D | + y: None +25-08-28 10:42:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:42:56 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:42:59 | D | + x - AbsMax +25-08-28 10:42:59 | D | + x = [min=1.4922, max=23.1250] +25-08-28 10:42:59 | D | + w - AbsMax +25-08-28 10:42:59 | D | + w = [min=0.1147, max=0.7500] +25-08-28 10:42:59 | D | + finished resetting calibrator, ram usage: 18.3 +25-08-28 10:43:02 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:45:06 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:45:06 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:45:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:45:06 | D | - sum error = [ 8671.2827, 8674.5015, 8714.0521, 8770.5430, 8841.8107] +25-08-28 10:45:06 | D | - best error = [ 8671.2827, 8671.2827, 8671.2827, 8671.2827, 8671.2827] +25-08-28 10:45:06 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:45:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:45:06 | D | - sum error = [ 8905.2385, 8973.9802, 9084.0000, 9189.8457, 9270.3670] +25-08-28 10:45:06 | D | - best error = [ 8671.2827, 8671.2827, 8671.2827, 8671.2827, 8671.2827] +25-08-28 10:45:06 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:45:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:45:06 | D | - sum error = [ 9394.7781, 9564.6244, 9734.3951, 9962.4507, 10181.7417] +25-08-28 10:45:06 | D | - best error = [ 8671.2827, 8671.2827, 8671.2827, 8671.2827, 8671.2827] +25-08-28 10:45:06 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:45:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:45:06 | D | - sum error = [10404.2345, 10637.8539, 10947.2298, 11199.3703, 11481.4877] +25-08-28 10:45:06 | D | - best error = [ 8671.2827, 8671.2827, 8671.2827, 8671.2827, 8671.2827] +25-08-28 10:45:06 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:45:06 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:45:06 | D | - sum error = [ 9176.7746, 9028.2198, 8947.0044, 8868.5891, 8818.8126] +25-08-28 10:45:06 | D | - best error = [ 8671.2827, 8671.2827, 8671.2827, 8671.2827, 8671.2827] +25-08-28 10:45:06 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:45:06 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:45:06 | D | - sum error = [ 8823.7620, 8826.8971, 8868.1412, 8923.9874, 8980.0940] +25-08-28 10:45:06 | D | - best error = [ 8671.2827, 8671.2827, 8671.2827, 8671.2827, 8671.2827] +25-08-28 10:45:06 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:45:06 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:45:06 | D | - sum error = [ 9122.9223, 9264.9533, 9461.7263, 9688.6882, 9984.1875] +25-08-28 10:45:06 | D | - best error = [ 8671.2827, 8671.2827, 8671.2827, 8671.2827, 8671.2827] +25-08-28 10:45:06 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:45:06 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:45:06 | D | - sum error = [10242.0713, 10557.3792, 10991.7427, 11365.2671] +25-08-28 10:45:06 | D | - best error = [ 8671.2827, 8671.2827, 8671.2827, 8671.2827] +25-08-28 10:45:06 | D | + error = 8671.2827 +25-08-28 10:45:06 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 10:45:27 | D | - Smoothing Diffusion Block single_transformer_blocks.8 +25-08-28 10:45:27 | D | - Skipping Module single_transformer_blocks.8.norm.linear +25-08-28 10:45:27 | D | - Smoothing Transformer Block single_transformer_blocks.8 +25-08-28 10:45:27 | D | - single_transformer_blocks.8.attn.qkv_proj + single_transformer_blocks.8.up_proj +25-08-28 10:45:27 | D | + w: sint4 +25-08-28 10:45:27 | D | + x: sint4 +25-08-28 10:45:27 | D | + y: None +25-08-28 10:45:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:45:27 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:45:27 | D | + x - AbsMax +25-08-28 10:45:27 | D | + x = [min=0.1484, max=18.2500] +25-08-28 10:45:27 | D | + w - AbsMax +25-08-28 10:45:27 | D | + w = [min=0.0786, max=0.6562] +25-08-28 10:45:27 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:45:28 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:47:36 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:47:36 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:47:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:47:36 | D | - sum error = [42185.2312, 41713.0326, 40240.9320, 39297.3420, 38615.2053] +25-08-28 10:47:36 | D | - best error = [42185.2312, 41713.0326, 40240.9320, 39297.3420, 38615.2053] +25-08-28 10:47:36 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:47:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:47:36 | D | - sum error = [38566.7231, 37979.4336, 37412.0118, 37569.7921, 36713.2020] +25-08-28 10:47:36 | D | - best error = [38566.7231, 37979.4336, 37412.0118, 37412.0118, 36713.2020] +25-08-28 10:47:36 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:47:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:47:36 | D | - sum error = [36675.8427, 36125.4667, 35947.5511, 35953.6858, 35950.8749] +25-08-28 10:47:36 | D | - best error = [36675.8427, 36125.4667, 35947.5511, 35947.5511, 35947.5511] +25-08-28 10:47:36 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:47:36 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:47:36 | D | - sum error = [36147.0950, 36263.8950, 36673.4642, 37093.1420, 37586.5678] +25-08-28 10:47:36 | D | - best error = [35947.5511, 35947.5511, 35947.5511, 35947.5511, 35947.5511] +25-08-28 10:47:36 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:47:36 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:47:36 | D | - sum error = [54748.5656, 52334.9067, 49989.0318, 48262.7396, 47282.9806] +25-08-28 10:47:36 | D | - best error = [35947.5511, 35947.5511, 35947.5511, 35947.5511, 35947.5511] +25-08-28 10:47:36 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:47:36 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:47:36 | D | - sum error = [45901.8515, 43827.1996, 42336.7090, 41672.9053, 40046.0759] +25-08-28 10:47:36 | D | - best error = [35947.5511, 35947.5511, 35947.5511, 35947.5511, 35947.5511] +25-08-28 10:47:36 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:47:36 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:47:36 | D | - sum error = [39470.5488, 38628.1622, 38045.9714, 37674.8532, 37409.1035] +25-08-28 10:47:36 | D | - best error = [35947.5511, 35947.5511, 35947.5511, 35947.5511, 35947.5511] +25-08-28 10:47:36 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:47:36 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:47:36 | D | - sum error = [37500.9470, 37645.2936, 37478.0217, 37781.0484] +25-08-28 10:47:36 | D | - best error = [35947.5511, 35947.5511, 35947.5511, 35947.5511] +25-08-28 10:47:36 | D | + error = 35947.5511 +25-08-28 10:47:36 | D | + scale = [min=0.3184, max=5.7116] +25-08-28 10:47:36 | D | - single_transformer_blocks.8.attn.out_proj +25-08-28 10:47:36 | D | + w: sint4 +25-08-28 10:47:36 | D | + x: sint4 +25-08-28 10:47:36 | D | + y: None +25-08-28 10:47:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:47:36 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:47:37 | D | + x - AbsMax +25-08-28 10:47:37 | D | + x = [min=1.7422, max=9.1250] +25-08-28 10:47:37 | D | + w - AbsMax +25-08-28 10:47:37 | D | + w = [min=0.1211, max=0.3164] +25-08-28 10:47:37 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:47:38 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:48:39 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:48:39 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:48:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:48:39 | D | - sum error = [ 6087.7477, 6082.0875, 6068.8912, 6069.2566, 6073.3236] +25-08-28 10:48:39 | D | - best error = [ 6087.7477, 6082.0875, 6068.8912, 6068.8912, 6068.8912] +25-08-28 10:48:39 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:48:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:48:39 | D | - sum error = [ 6075.4111, 6081.3489, 6101.3716, 6119.1075, 6137.1928] +25-08-28 10:48:39 | D | - best error = [ 6068.8912, 6068.8912, 6068.8912, 6068.8912, 6068.8912] +25-08-28 10:48:39 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:48:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:48:39 | D | - sum error = [ 6156.0828, 6177.0773, 6204.6769, 6236.3486, 6272.1145] +25-08-28 10:48:39 | D | - best error = [ 6068.8912, 6068.8912, 6068.8912, 6068.8912, 6068.8912] +25-08-28 10:48:39 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:48:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:48:39 | D | - sum error = [ 6312.5279, 6360.0814, 6419.4218, 6472.5204, 6536.3748] +25-08-28 10:48:39 | D | - best error = [ 6068.8912, 6068.8912, 6068.8912, 6068.8912, 6068.8912] +25-08-28 10:48:39 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:48:39 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:48:39 | D | - sum error = [ 6335.3005, 6307.4372, 6271.5673, 6237.3235, 6224.3313] +25-08-28 10:48:39 | D | - best error = [ 6068.8912, 6068.8912, 6068.8912, 6068.8912, 6068.8912] +25-08-28 10:48:39 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:48:39 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:48:39 | D | - sum error = [ 6212.3084, 6191.4276, 6197.0428, 6198.0752, 6204.9535] +25-08-28 10:48:39 | D | - best error = [ 6068.8912, 6068.8912, 6068.8912, 6068.8912, 6068.8912] +25-08-28 10:48:39 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:48:39 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:48:39 | D | - sum error = [ 6223.8021, 6248.5077, 6275.1535, 6298.2417, 6342.9807] +25-08-28 10:48:39 | D | - best error = [ 6068.8912, 6068.8912, 6068.8912, 6068.8912, 6068.8912] +25-08-28 10:48:39 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:48:39 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:48:39 | D | - sum error = [ 6379.9730, 6428.1375, 6475.6597, 6523.9825] +25-08-28 10:48:39 | D | - best error = [ 6068.8912, 6068.8912, 6068.8912, 6068.8912] +25-08-28 10:48:39 | D | + error = 6068.8912 +25-08-28 10:48:39 | D | + scale = [min=1.0571, max=1.2475] +25-08-28 10:48:39 | D | - single_transformer_blocks.8.down_proj +25-08-28 10:48:39 | D | + w: sint4 +25-08-28 10:48:39 | D | + x: uint4 +25-08-28 10:48:39 | D | + y: None +25-08-28 10:48:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:48:39 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:48:41 | D | + x - AbsMax +25-08-28 10:48:41 | D | + x = [min=1.7812, max=13.8750] +25-08-28 10:48:41 | D | + w - AbsMax +25-08-28 10:48:41 | D | + w = [min=0.1143, max=1.5547] +25-08-28 10:48:41 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:48:43 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:50:43 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:50:43 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:50:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:50:43 | D | - sum error = [ 8219.0087, 8239.9675, 8252.6262, 8268.8282, 8324.1297] +25-08-28 10:50:43 | D | - best error = [ 8219.0087, 8219.0087, 8219.0087, 8219.0087, 8219.0087] +25-08-28 10:50:43 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:50:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:50:43 | D | - sum error = [ 8394.2857, 8442.4759, 8497.0451, 8601.7645, 8718.6838] +25-08-28 10:50:43 | D | - best error = [ 8219.0087, 8219.0087, 8219.0087, 8219.0087, 8219.0087] +25-08-28 10:50:43 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:50:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:50:43 | D | - sum error = [ 8872.6082, 8940.1262, 9057.7202, 9219.3557, 9380.7556] +25-08-28 10:50:43 | D | - best error = [ 8219.0087, 8219.0087, 8219.0087, 8219.0087, 8219.0087] +25-08-28 10:50:43 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:50:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:50:43 | D | - sum error = [ 9587.5640, 9762.7963, 9968.7688, 10186.4805, 10392.5639] +25-08-28 10:50:43 | D | - best error = [ 8219.0087, 8219.0087, 8219.0087, 8219.0087, 8219.0087] +25-08-28 10:50:43 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:50:43 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:50:43 | D | - sum error = [ 9127.5740, 8935.9517, 8779.1717, 8671.6651, 8532.6634] +25-08-28 10:50:43 | D | - best error = [ 8219.0087, 8219.0087, 8219.0087, 8219.0087, 8219.0087] +25-08-28 10:50:43 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:50:43 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:50:43 | D | - sum error = [ 8456.2914, 8457.6068, 8442.9275, 8475.7829, 8517.3019] +25-08-28 10:50:43 | D | - best error = [ 8219.0087, 8219.0087, 8219.0087, 8219.0087, 8219.0087] +25-08-28 10:50:43 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:50:43 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:50:43 | D | - sum error = [ 8581.1440, 8671.2255, 8818.1957, 9038.2737, 9228.1951] +25-08-28 10:50:43 | D | - best error = [ 8219.0087, 8219.0087, 8219.0087, 8219.0087, 8219.0087] +25-08-28 10:50:43 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:50:43 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:50:43 | D | - sum error = [ 9435.7762, 9703.5849, 10019.1752, 10255.2098] +25-08-28 10:50:43 | D | - best error = [ 8219.0087, 8219.0087, 8219.0087, 8219.0087] +25-08-28 10:50:43 | D | + error = 8219.0087 +25-08-28 10:50:43 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 10:51:04 | D | - Smoothing Diffusion Block single_transformer_blocks.9 +25-08-28 10:51:04 | D | - Skipping Module single_transformer_blocks.9.norm.linear +25-08-28 10:51:04 | D | - Smoothing Transformer Block single_transformer_blocks.9 +25-08-28 10:51:04 | D | - single_transformer_blocks.9.attn.qkv_proj + single_transformer_blocks.9.up_proj +25-08-28 10:51:04 | D | + w: sint4 +25-08-28 10:51:04 | D | + x: sint4 +25-08-28 10:51:04 | D | + y: None +25-08-28 10:51:04 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:51:04 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:51:04 | D | + x - AbsMax +25-08-28 10:51:04 | D | + x = [min=0.1709, max=19.8750] +25-08-28 10:51:04 | D | + w - AbsMax +25-08-28 10:51:04 | D | + w = [min=0.0752, max=0.8906] +25-08-28 10:51:04 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:51:06 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:53:11 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:53:11 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:53:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:53:11 | D | - sum error = [47467.2219, 46035.0781, 44917.4617, 43936.5259, 42868.9389] +25-08-28 10:53:11 | D | - best error = [47467.2219, 46035.0781, 44917.4617, 43936.5259, 42868.9389] +25-08-28 10:53:11 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:53:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:53:11 | D | - sum error = [42031.9284, 41432.8040, 40875.7884, 40431.2719, 40102.7895] +25-08-28 10:53:11 | D | - best error = [42031.9284, 41432.8040, 40875.7884, 40431.2719, 40102.7895] +25-08-28 10:53:11 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:53:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:53:11 | D | - sum error = [40019.4129, 39730.2923, 39655.4876, 39845.9514, 39815.6954] +25-08-28 10:53:11 | D | - best error = [40019.4129, 39730.2923, 39655.4876, 39655.4876, 39655.4876] +25-08-28 10:53:11 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:53:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:53:11 | D | - sum error = [40072.3346, 40405.6107, 40962.3656, 41146.5968, 41923.7510] +25-08-28 10:53:11 | D | - best error = [39655.4876, 39655.4876, 39655.4876, 39655.4876, 39655.4876] +25-08-28 10:53:11 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:53:11 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:53:11 | D | - sum error = [56905.5110, 54418.7756, 52725.0110, 50569.7611, 48877.4677] +25-08-28 10:53:11 | D | - best error = [39655.4876, 39655.4876, 39655.4876, 39655.4876, 39655.4876] +25-08-28 10:53:11 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:53:11 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:53:11 | D | - sum error = [47358.5162, 46140.7738, 44945.9565, 44042.2357, 43039.9970] +25-08-28 10:53:11 | D | - best error = [39655.4876, 39655.4876, 39655.4876, 39655.4876, 39655.4876] +25-08-28 10:53:11 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:53:11 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:53:11 | D | - sum error = [42330.8084, 41885.1901, 41470.6018, 41066.5583, 41018.9810] +25-08-28 10:53:11 | D | - best error = [39655.4876, 39655.4876, 39655.4876, 39655.4876, 39655.4876] +25-08-28 10:53:11 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:53:11 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:53:11 | D | - sum error = [41213.8944, 41378.4083, 41448.5443, 42044.9223] +25-08-28 10:53:11 | D | - best error = [39655.4876, 39655.4876, 39655.4876, 39655.4876] +25-08-28 10:53:11 | D | + error = 39655.4876 +25-08-28 10:53:11 | D | + scale = [min=0.3465, max=6.0115] +25-08-28 10:53:11 | D | - single_transformer_blocks.9.attn.out_proj +25-08-28 10:53:11 | D | + w: sint4 +25-08-28 10:53:11 | D | + x: sint4 +25-08-28 10:53:11 | D | + y: None +25-08-28 10:53:11 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:53:11 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:53:12 | D | + x - AbsMax +25-08-28 10:53:12 | D | + x = [min=1.9453, max=10.2500] +25-08-28 10:53:12 | D | + w - AbsMax +25-08-28 10:53:12 | D | + w = [min=0.1226, max=0.4766] +25-08-28 10:53:12 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:53:13 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:54:12 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:54:12 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:54:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:54:12 | D | - sum error = [ 7003.0002, 6985.2095, 6965.3749, 6949.7891, 6948.8330] +25-08-28 10:54:12 | D | - best error = [ 7003.0002, 6985.2095, 6965.3749, 6949.7891, 6948.8330] +25-08-28 10:54:12 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:54:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:54:12 | D | - sum error = [ 6944.2212, 6937.7596, 6949.8449, 6966.5041, 6980.1275] +25-08-28 10:54:12 | D | - best error = [ 6944.2212, 6937.7596, 6937.7596, 6937.7596, 6937.7596] +25-08-28 10:54:12 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:54:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:54:12 | D | - sum error = [ 7015.9923, 7039.5327, 7077.1198, 7111.7860, 7164.8269] +25-08-28 10:54:12 | D | - best error = [ 6937.7596, 6937.7596, 6937.7596, 6937.7596, 6937.7596] +25-08-28 10:54:12 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:54:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:54:12 | D | - sum error = [ 7221.7605, 7284.3893, 7357.6303, 7422.7725, 7501.8648] +25-08-28 10:54:12 | D | - best error = [ 6937.7596, 6937.7596, 6937.7596, 6937.7596, 6937.7596] +25-08-28 10:54:12 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:54:12 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:54:12 | D | - sum error = [ 7287.8598, 7235.0364, 7193.5107, 7157.6667, 7121.8784] +25-08-28 10:54:12 | D | - best error = [ 6937.7596, 6937.7596, 6937.7596, 6937.7596, 6937.7596] +25-08-28 10:54:12 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:54:12 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:54:12 | D | - sum error = [ 7101.0109, 7089.0696, 7076.9994, 7072.3385, 7084.3845] +25-08-28 10:54:12 | D | - best error = [ 6937.7596, 6937.7596, 6937.7596, 6937.7596, 6937.7596] +25-08-28 10:54:12 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:54:12 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:54:12 | D | - sum error = [ 7099.5115, 7118.0680, 7150.2238, 7193.7287, 7224.9482] +25-08-28 10:54:12 | D | - best error = [ 6937.7596, 6937.7596, 6937.7596, 6937.7596, 6937.7596] +25-08-28 10:54:12 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:54:12 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:54:12 | D | - sum error = [ 7291.5647, 7352.7812, 7416.9476, 7491.5622] +25-08-28 10:54:12 | D | - best error = [ 6937.7596, 6937.7596, 6937.7596, 6937.7596] +25-08-28 10:54:12 | D | + error = 6937.7596 +25-08-28 10:54:12 | D | + scale = [min=1.2209, max=2.0101] +25-08-28 10:54:13 | D | - single_transformer_blocks.9.down_proj +25-08-28 10:54:13 | D | + w: sint4 +25-08-28 10:54:13 | D | + x: uint4 +25-08-28 10:54:13 | D | + y: None +25-08-28 10:54:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:54:13 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:54:15 | D | + x - AbsMax +25-08-28 10:54:15 | D | + x = [min=0.7656, max=18.2500] +25-08-28 10:54:15 | D | + w - AbsMax +25-08-28 10:54:15 | D | + w = [min=0.1191, max=1.2344] +25-08-28 10:54:15 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:54:18 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:56:26 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:56:26 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:56:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:56:26 | D | - sum error = [ 7932.7082, 7962.9213, 7965.7083, 7993.7831, 8062.1156] +25-08-28 10:56:26 | D | - best error = [ 7932.7082, 7932.7082, 7932.7082, 7932.7082, 7932.7082] +25-08-28 10:56:26 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:56:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:56:26 | D | - sum error = [ 8102.1861, 8153.7140, 8231.6288, 8327.5490, 8421.8022] +25-08-28 10:56:26 | D | - best error = [ 7932.7082, 7932.7082, 7932.7082, 7932.7082, 7932.7082] +25-08-28 10:56:26 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:56:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:56:26 | D | - sum error = [ 8504.7972, 8597.0646, 8713.3580, 8816.9578, 8963.0249] +25-08-28 10:56:26 | D | - best error = [ 7932.7082, 7932.7082, 7932.7082, 7932.7082, 7932.7082] +25-08-28 10:56:26 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:56:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:56:26 | D | - sum error = [ 9108.5863, 9258.5355, 9469.8334, 9632.9435, 9819.1776] +25-08-28 10:56:26 | D | - best error = [ 7932.7082, 7932.7082, 7932.7082, 7932.7082, 7932.7082] +25-08-28 10:56:26 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:56:26 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:56:26 | D | - sum error = [ 8376.1571, 8255.7655, 8162.7392, 8081.3269, 8044.5900] +25-08-28 10:56:26 | D | - best error = [ 7932.7082, 7932.7082, 7932.7082, 7932.7082, 7932.7082] +25-08-28 10:56:26 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:56:26 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:56:26 | D | - sum error = [ 8021.1231, 8038.8019, 8039.3721, 8070.8037, 8113.0137] +25-08-28 10:56:26 | D | - best error = [ 7932.7082, 7932.7082, 7932.7082, 7932.7082, 7932.7082] +25-08-28 10:56:26 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:56:26 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:56:26 | D | - sum error = [ 8218.2519, 8299.7373, 8409.7134, 8525.1163, 8711.5147] +25-08-28 10:56:26 | D | - best error = [ 7932.7082, 7932.7082, 7932.7082, 7932.7082, 7932.7082] +25-08-28 10:56:26 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:56:26 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:56:26 | D | - sum error = [ 8936.7945, 9183.4203, 9459.5750, 9689.0937] +25-08-28 10:56:26 | D | - best error = [ 7932.7082, 7932.7082, 7932.7082, 7932.7082] +25-08-28 10:56:26 | D | + error = 7932.7082 +25-08-28 10:56:26 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 10:56:46 | D | - Smoothing Diffusion Block single_transformer_blocks.10 +25-08-28 10:56:46 | D | - Skipping Module single_transformer_blocks.10.norm.linear +25-08-28 10:56:46 | D | - Smoothing Transformer Block single_transformer_blocks.10 +25-08-28 10:56:46 | D | - single_transformer_blocks.10.attn.qkv_proj + single_transformer_blocks.10.up_proj +25-08-28 10:56:46 | D | + w: sint4 +25-08-28 10:56:46 | D | + x: sint4 +25-08-28 10:56:46 | D | + y: None +25-08-28 10:56:46 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:56:46 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:56:46 | D | + x - AbsMax +25-08-28 10:56:46 | D | + x = [min=0.1221, max=16.5000] +25-08-28 10:56:46 | D | + w - AbsMax +25-08-28 10:56:46 | D | + w = [min=0.0840, max=0.6523] +25-08-28 10:56:46 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:56:48 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:58:53 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:58:53 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:58:53 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:58:53 | D | - sum error = [58314.1769, 56543.8778, 55188.0520, 53566.1123, 52338.3527] +25-08-28 10:58:53 | D | - best error = [58314.1769, 56543.8778, 55188.0520, 53566.1123, 52338.3527] +25-08-28 10:58:53 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:58:53 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:58:53 | D | - sum error = [51246.4090, 50269.5556, 49455.8499, 49775.3854, 48272.3049] +25-08-28 10:58:53 | D | - best error = [51246.4090, 50269.5556, 49455.8499, 49455.8499, 48272.3049] +25-08-28 10:58:53 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:58:53 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:58:53 | D | - sum error = [47982.0393, 47365.6997, 48749.6947, 47123.8464, 46056.8817] +25-08-28 10:58:53 | D | - best error = [47982.0393, 47365.6997, 47365.6997, 47123.8464, 46056.8817] +25-08-28 10:58:53 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:58:53 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:58:53 | D | - sum error = [45974.4402, 45886.0097, 45743.4963, 45968.3021, 46365.4216] +25-08-28 10:58:53 | D | - best error = [45974.4402, 45886.0097, 45743.4963, 45743.4963, 45743.4963] +25-08-28 10:58:53 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:58:53 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:58:53 | D | - sum error = [81816.8507, 78493.0158, 75769.2902, 71037.9063, 66262.6280] +25-08-28 10:58:53 | D | - best error = [45743.4963, 45743.4963, 45743.4963, 45743.4963, 45743.4963] +25-08-28 10:58:53 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:58:53 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:58:53 | D | - sum error = [62910.9807, 60357.7367, 57307.2114, 55190.0698, 53686.7525] +25-08-28 10:58:53 | D | - best error = [45743.4963, 45743.4963, 45743.4963, 45743.4963, 45743.4963] +25-08-28 10:58:53 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:58:53 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:58:53 | D | - sum error = [52999.3884, 51165.7235, 52654.2230, 48866.5016, 48526.8315] +25-08-28 10:58:53 | D | - best error = [45743.4963, 45743.4963, 45743.4963, 45743.4963, 45743.4963] +25-08-28 10:58:53 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:58:53 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:58:53 | D | - sum error = [47187.5672, 46716.9150, 46559.3952, 46625.0679] +25-08-28 10:58:53 | D | - best error = [45743.4963, 45743.4963, 45743.4963, 45743.4963] +25-08-28 10:58:53 | D | + error = 45743.4963 +25-08-28 10:58:53 | D | + scale = [min=0.1673, max=10.8358] +25-08-28 10:58:53 | D | - single_transformer_blocks.10.attn.out_proj +25-08-28 10:58:53 | D | + w: sint4 +25-08-28 10:58:53 | D | + x: sint4 +25-08-28 10:58:53 | D | + y: None +25-08-28 10:58:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:58:53 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:58:54 | D | + x - AbsMax +25-08-28 10:58:54 | D | + x = [min=1.5781, max=9.3125] +25-08-28 10:58:54 | D | + w - AbsMax +25-08-28 10:58:54 | D | + w = [min=0.1177, max=0.3691] +25-08-28 10:58:54 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:58:55 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 10:59:54 | D | - x / w range = AbsMax / AbsMax +25-08-28 10:59:54 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 10:59:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:59:54 | D | - sum error = [ 6570.0164, 6540.2122, 6524.3654, 6518.8677, 6514.3172] +25-08-28 10:59:54 | D | - best error = [ 6570.0164, 6540.2122, 6524.3654, 6518.8677, 6514.3172] +25-08-28 10:59:54 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 10:59:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:59:54 | D | - sum error = [ 6507.4219, 6510.8795, 6512.4376, 6507.2784, 6502.9098] +25-08-28 10:59:54 | D | - best error = [ 6507.4219, 6507.4219, 6507.4219, 6507.2784, 6502.9098] +25-08-28 10:59:54 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 10:59:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:59:54 | D | - sum error = [ 6518.9680, 6539.6070, 6566.3609, 6592.4241, 6622.1450] +25-08-28 10:59:54 | D | - best error = [ 6502.9098, 6502.9098, 6502.9098, 6502.9098, 6502.9098] +25-08-28 10:59:54 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:59:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 10:59:54 | D | - sum error = [ 6663.9772, 6682.2005, 6728.1308, 6765.9728, 6811.7575] +25-08-28 10:59:54 | D | - best error = [ 6502.9098, 6502.9098, 6502.9098, 6502.9098, 6502.9098] +25-08-28 10:59:54 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 10:59:54 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 10:59:54 | D | - sum error = [ 6960.6278, 6898.0536, 6847.2850, 6792.8875, 6747.3559] +25-08-28 10:59:54 | D | - best error = [ 6502.9098, 6502.9098, 6502.9098, 6502.9098, 6502.9098] +25-08-28 10:59:54 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 10:59:54 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 10:59:54 | D | - sum error = [ 6716.4136, 6687.0846, 6668.5234, 6664.3208, 6651.8406] +25-08-28 10:59:54 | D | - best error = [ 6502.9098, 6502.9098, 6502.9098, 6502.9098, 6502.9098] +25-08-28 10:59:54 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 10:59:54 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 10:59:54 | D | - sum error = [ 6645.5820, 6637.3877, 6640.1443, 6657.2162, 6684.3384] +25-08-28 10:59:54 | D | - best error = [ 6502.9098, 6502.9098, 6502.9098, 6502.9098, 6502.9098] +25-08-28 10:59:54 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 10:59:54 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 10:59:54 | D | - sum error = [ 6705.7089, 6719.2666, 6762.0828, 6810.9203] +25-08-28 10:59:54 | D | - best error = [ 6502.9098, 6502.9098, 6502.9098, 6502.9098] +25-08-28 10:59:54 | D | + error = 6502.9098 +25-08-28 10:59:54 | D | + scale = [min=1.2279, max=2.7295] +25-08-28 10:59:54 | D | - single_transformer_blocks.10.down_proj +25-08-28 10:59:54 | D | + w: sint4 +25-08-28 10:59:54 | D | + x: uint4 +25-08-28 10:59:54 | D | + y: None +25-08-28 10:59:54 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 10:59:54 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 10:59:57 | D | + x - AbsMax +25-08-28 10:59:57 | D | + x = [min=1.3750, max=13.5000] +25-08-28 10:59:57 | D | + w - AbsMax +25-08-28 10:59:57 | D | + w = [min=0.1187, max=1.1094] +25-08-28 10:59:57 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 10:59:59 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:02:01 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:02:01 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:02:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:02:01 | D | - sum error = [ 8467.2508, 8449.4677, 8492.0942, 8500.2795, 8568.0663] +25-08-28 11:02:01 | D | - best error = [ 8467.2508, 8449.4677, 8449.4677, 8449.4677, 8449.4677] +25-08-28 11:02:01 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:02:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:02:01 | D | - sum error = [ 8619.7550, 8663.1680, 8745.2323, 8805.8381, 8916.1803] +25-08-28 11:02:01 | D | - best error = [ 8449.4677, 8449.4677, 8449.4677, 8449.4677, 8449.4677] +25-08-28 11:02:01 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:02:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:02:01 | D | - sum error = [ 9058.4895, 9165.3818, 9289.2695, 9411.7544, 9537.4638] +25-08-28 11:02:01 | D | - best error = [ 8449.4677, 8449.4677, 8449.4677, 8449.4677, 8449.4677] +25-08-28 11:02:01 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:02:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:02:01 | D | - sum error = [ 9694.2152, 9837.6062, 9987.4689, 10176.2748, 10361.6327] +25-08-28 11:02:01 | D | - best error = [ 8449.4677, 8449.4677, 8449.4677, 8449.4677, 8449.4677] +25-08-28 11:02:01 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:02:01 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:02:01 | D | - sum error = [ 9589.6586, 9357.1239, 9169.6901, 9014.8926, 8886.9414] +25-08-28 11:02:01 | D | - best error = [ 8449.4677, 8449.4677, 8449.4677, 8449.4677, 8449.4677] +25-08-28 11:02:01 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:02:01 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:02:01 | D | - sum error = [ 8810.0234, 8757.9715, 8756.7012, 8726.2913, 8808.2816] +25-08-28 11:02:01 | D | - best error = [ 8449.4677, 8449.4677, 8449.4677, 8449.4677, 8449.4677] +25-08-28 11:02:01 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:02:01 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:02:01 | D | - sum error = [ 8880.2761, 8920.3946, 9060.2406, 9177.1508, 9355.9145] +25-08-28 11:02:01 | D | - best error = [ 8449.4677, 8449.4677, 8449.4677, 8449.4677, 8449.4677] +25-08-28 11:02:01 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:02:01 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:02:01 | D | - sum error = [ 9558.1170, 9782.1204, 9968.1854, 10266.1709] +25-08-28 11:02:01 | D | - best error = [ 8449.4677, 8449.4677, 8449.4677, 8449.4677] +25-08-28 11:02:01 | D | + error = 8449.4677 +25-08-28 11:02:01 | D | + scale = [min=1.0161, max=1.1390] +25-08-28 11:02:22 | D | - Smoothing Diffusion Block single_transformer_blocks.11 +25-08-28 11:02:22 | D | - Skipping Module single_transformer_blocks.11.norm.linear +25-08-28 11:02:22 | D | - Smoothing Transformer Block single_transformer_blocks.11 +25-08-28 11:02:22 | D | - single_transformer_blocks.11.attn.qkv_proj + single_transformer_blocks.11.up_proj +25-08-28 11:02:22 | D | + w: sint4 +25-08-28 11:02:22 | D | + x: sint4 +25-08-28 11:02:22 | D | + y: None +25-08-28 11:02:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:02:22 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:02:22 | D | + x - AbsMax +25-08-28 11:02:22 | D | + x = [min=0.1553, max=21.7500] +25-08-28 11:02:22 | D | + w - AbsMax +25-08-28 11:02:22 | D | + w = [min=0.0679, max=0.6641] +25-08-28 11:02:22 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:02:24 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:04:30 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:04:30 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:04:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:04:30 | D | - sum error = [60338.5605, 59154.1704, 57738.5445, 57311.6471, 55239.5823] +25-08-28 11:04:30 | D | - best error = [60338.5605, 59154.1704, 57738.5445, 57311.6471, 55239.5823] +25-08-28 11:04:30 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:04:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:04:30 | D | - sum error = [54695.5314, 54182.6411, 53130.7290, 52466.7056, 51613.0935] +25-08-28 11:04:30 | D | - best error = [54695.5314, 54182.6411, 53130.7290, 52466.7056, 51613.0935] +25-08-28 11:04:30 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:04:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:04:30 | D | - sum error = [50970.6714, 50541.0547, 50224.0943, 50041.2988, 50214.1845] +25-08-28 11:04:30 | D | - best error = [50970.6714, 50541.0547, 50224.0943, 50041.2988, 50041.2988] +25-08-28 11:04:30 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:04:30 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:04:30 | D | - sum error = [50742.4127, 50194.3002, 50304.2824, 50563.9897, 51535.9553] +25-08-28 11:04:30 | D | - best error = [50041.2988, 50041.2988, 50041.2988, 50041.2988, 50041.2988] +25-08-28 11:04:30 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:04:30 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:04:30 | D | - sum error = [73646.8964, 71101.1449, 68526.7093, 66119.4381, 64514.4365] +25-08-28 11:04:30 | D | - best error = [50041.2988, 50041.2988, 50041.2988, 50041.2988, 50041.2988] +25-08-28 11:04:30 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:04:30 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:04:30 | D | - sum error = [61901.8330, 60094.6943, 58175.3223, 56601.0126, 54707.0347] +25-08-28 11:04:30 | D | - best error = [50041.2988, 50041.2988, 50041.2988, 50041.2988, 50041.2988] +25-08-28 11:04:30 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:04:30 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:04:30 | D | - sum error = [53749.6633, 53053.8263, 52180.8817, 51704.8658, 51687.1599] +25-08-28 11:04:30 | D | - best error = [50041.2988, 50041.2988, 50041.2988, 50041.2988, 50041.2988] +25-08-28 11:04:30 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:04:30 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:04:30 | D | - sum error = [51488.4002, 51412.4194, 51114.7666, 52055.2322] +25-08-28 11:04:30 | D | - best error = [50041.2988, 50041.2988, 50041.2988, 50041.2988] +25-08-28 11:04:30 | D | + error = 50041.2988 +25-08-28 11:04:30 | D | + scale = [min=0.2980, max=7.4020] +25-08-28 11:04:31 | D | - single_transformer_blocks.11.attn.out_proj +25-08-28 11:04:31 | D | + w: sint4 +25-08-28 11:04:31 | D | + x: sint4 +25-08-28 11:04:31 | D | + y: None +25-08-28 11:04:31 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:04:31 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:04:32 | D | + x - AbsMax +25-08-28 11:04:32 | D | + x = [min=1.3125, max=8.4375] +25-08-28 11:04:32 | D | + w - AbsMax +25-08-28 11:04:32 | D | + w = [min=0.1260, max=0.4004] +25-08-28 11:04:32 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:04:33 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:05:35 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:05:35 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:05:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:05:35 | D | - sum error = [ 6370.1859, 6354.4967, 6334.8585, 6339.0599, 6324.1306] +25-08-28 11:05:35 | D | - best error = [ 6370.1859, 6354.4967, 6334.8585, 6334.8585, 6324.1306] +25-08-28 11:05:35 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:05:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:05:35 | D | - sum error = [ 6322.1936, 6320.1320, 6325.6393, 6326.4581, 6342.2558] +25-08-28 11:05:35 | D | - best error = [ 6322.1936, 6320.1320, 6320.1320, 6320.1320, 6320.1320] +25-08-28 11:05:35 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:05:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:05:35 | D | - sum error = [ 6362.5165, 6386.0833, 6405.8623, 6432.0740, 6459.9483] +25-08-28 11:05:35 | D | - best error = [ 6320.1320, 6320.1320, 6320.1320, 6320.1320, 6320.1320] +25-08-28 11:05:35 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:05:35 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:05:35 | D | - sum error = [ 6506.1894, 6546.4586, 6590.9814, 6639.2469, 6679.9805] +25-08-28 11:05:35 | D | - best error = [ 6320.1320, 6320.1320, 6320.1320, 6320.1320, 6320.1320] +25-08-28 11:05:35 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:05:35 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:05:35 | D | - sum error = [ 6838.0673, 6765.7443, 6705.0834, 6651.8476, 6598.9181] +25-08-28 11:05:35 | D | - best error = [ 6320.1320, 6320.1320, 6320.1320, 6320.1320, 6320.1320] +25-08-28 11:05:35 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:05:35 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:05:35 | D | - sum error = [ 6552.0665, 6526.7582, 6502.1095, 6485.5014, 6473.6815] +25-08-28 11:05:35 | D | - best error = [ 6320.1320, 6320.1320, 6320.1320, 6320.1320, 6320.1320] +25-08-28 11:05:35 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:05:35 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:05:35 | D | - sum error = [ 6473.6216, 6476.7869, 6482.1379, 6498.4731, 6523.7814] +25-08-28 11:05:35 | D | - best error = [ 6320.1320, 6320.1320, 6320.1320, 6320.1320, 6320.1320] +25-08-28 11:05:35 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:05:35 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:05:35 | D | - sum error = [ 6555.6916, 6591.2654, 6641.2321, 6681.5965] +25-08-28 11:05:35 | D | - best error = [ 6320.1320, 6320.1320, 6320.1320, 6320.1320] +25-08-28 11:05:35 | D | + error = 6320.1320 +25-08-28 11:05:35 | D | + scale = [min=1.0850, max=1.8961] +25-08-28 11:05:35 | D | - single_transformer_blocks.11.down_proj +25-08-28 11:05:35 | D | + w: sint4 +25-08-28 11:05:35 | D | + x: uint4 +25-08-28 11:05:35 | D | + y: None +25-08-28 11:05:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:05:35 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:05:39 | D | + x - AbsMax +25-08-28 11:05:39 | D | + x = [min=1.8359, max=13.5000] +25-08-28 11:05:39 | D | + w - AbsMax +25-08-28 11:05:39 | D | + w = [min=0.1143, max=1.0469] +25-08-28 11:05:39 | D | + finished resetting calibrator, ram usage: 18.3 +25-08-28 11:05:41 | D | + finished calculating the original outputs, ram usage: 18.3 +25-08-28 11:07:41 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:07:41 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:07:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:07:41 | D | - sum error = [ 8766.4086, 8759.8183, 8769.8379, 8790.3101, 8842.8430] +25-08-28 11:07:41 | D | - best error = [ 8766.4086, 8759.8183, 8759.8183, 8759.8183, 8759.8183] +25-08-28 11:07:41 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:07:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:07:41 | D | - sum error = [ 8882.9590, 8942.7329, 8984.0499, 9035.7468, 9112.6778] +25-08-28 11:07:41 | D | - best error = [ 8759.8183, 8759.8183, 8759.8183, 8759.8183, 8759.8183] +25-08-28 11:07:41 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:07:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:07:41 | D | - sum error = [ 9179.7595, 9299.5931, 9448.3243, 9565.7577, 9703.0053] +25-08-28 11:07:41 | D | - best error = [ 8759.8183, 8759.8183, 8759.8183, 8759.8183, 8759.8183] +25-08-28 11:07:41 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:07:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:07:41 | D | - sum error = [ 9838.5397, 10066.6876, 10243.9668, 10395.0065, 10601.0603] +25-08-28 11:07:41 | D | - best error = [ 8759.8183, 8759.8183, 8759.8183, 8759.8183, 8759.8183] +25-08-28 11:07:41 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:07:41 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:07:41 | D | - sum error = [10072.6707, 9821.8735, 9602.8533, 9415.4647, 9245.7765] +25-08-28 11:07:41 | D | - best error = [ 8759.8183, 8759.8183, 8759.8183, 8759.8183, 8759.8183] +25-08-28 11:07:41 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:07:41 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:07:41 | D | - sum error = [ 9156.1092, 9083.6451, 9013.3168, 8989.9077, 9023.9880] +25-08-28 11:07:41 | D | - best error = [ 8759.8183, 8759.8183, 8759.8183, 8759.8183, 8759.8183] +25-08-28 11:07:41 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:07:41 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:07:41 | D | - sum error = [ 9048.2272, 9145.5712, 9239.2069, 9345.9189, 9492.0185] +25-08-28 11:07:41 | D | - best error = [ 8759.8183, 8759.8183, 8759.8183, 8759.8183, 8759.8183] +25-08-28 11:07:41 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:07:41 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:07:41 | D | - sum error = [ 9705.9726, 9945.2445, 10198.8384, 10492.2573] +25-08-28 11:07:41 | D | - best error = [ 8759.8183, 8759.8183, 8759.8183, 8759.8183] +25-08-28 11:07:41 | D | + error = 8759.8183 +25-08-28 11:07:41 | D | + scale = [min=1.0308, max=1.1390] +25-08-28 11:08:02 | D | - Smoothing Diffusion Block single_transformer_blocks.12 +25-08-28 11:08:02 | D | - Skipping Module single_transformer_blocks.12.norm.linear +25-08-28 11:08:02 | D | - Smoothing Transformer Block single_transformer_blocks.12 +25-08-28 11:08:02 | D | - single_transformer_blocks.12.attn.qkv_proj + single_transformer_blocks.12.up_proj +25-08-28 11:08:02 | D | + w: sint4 +25-08-28 11:08:02 | D | + x: sint4 +25-08-28 11:08:02 | D | + y: None +25-08-28 11:08:02 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:08:02 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:08:03 | D | + x - AbsMax +25-08-28 11:08:03 | D | + x = [min=0.1387, max=22.0000] +25-08-28 11:08:03 | D | + w - AbsMax +25-08-28 11:08:03 | D | + w = [min=0.0728, max=0.8086] +25-08-28 11:08:03 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:08:04 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:10:09 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:10:09 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:10:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:10:09 | D | - sum error = [66872.2216, 65190.3463, 64729.2730, 62198.9993, 60992.3568] +25-08-28 11:10:09 | D | - best error = [66872.2216, 65190.3463, 64729.2730, 62198.9993, 60992.3568] +25-08-28 11:10:09 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:10:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:10:09 | D | - sum error = [59382.0681, 58390.5636, 58283.8933, 56421.6719, 56385.3394] +25-08-28 11:10:09 | D | - best error = [59382.0681, 58390.5636, 58283.8933, 56421.6719, 56385.3394] +25-08-28 11:10:09 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:10:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:10:09 | D | - sum error = [55185.9460, 54569.4655, 54302.8780, 53732.4334, 53471.1334] +25-08-28 11:10:09 | D | - best error = [55185.9460, 54569.4655, 54302.8780, 53732.4334, 53471.1334] +25-08-28 11:10:09 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:10:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:10:09 | D | - sum error = [53255.6821, 53613.9803, 54038.9098, 53818.3897, 54096.9843] +25-08-28 11:10:09 | D | - best error = [53255.6821, 53255.6821, 53255.6821, 53255.6821, 53255.6821] +25-08-28 11:10:09 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:10:09 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:10:09 | D | - sum error = [82522.1122, 79290.2500, 75975.0426, 72989.2994, 70400.9854] +25-08-28 11:10:09 | D | - best error = [53255.6821, 53255.6821, 53255.6821, 53255.6821, 53255.6821] +25-08-28 11:10:09 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:10:09 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:10:09 | D | - sum error = [68159.6207, 66177.5087, 64116.9172, 62832.3402, 60150.0822] +25-08-28 11:10:09 | D | - best error = [53255.6821, 53255.6821, 53255.6821, 53255.6821, 53255.6821] +25-08-28 11:10:09 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:10:09 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:10:09 | D | - sum error = [59757.0055, 58143.3440, 57192.3754, 56588.1729, 55103.8374] +25-08-28 11:10:09 | D | - best error = [53255.6821, 53255.6821, 53255.6821, 53255.6821, 53255.6821] +25-08-28 11:10:09 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:10:09 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:10:09 | D | - sum error = [54910.5035, 54407.1666, 54279.9216, 54550.5165] +25-08-28 11:10:09 | D | - best error = [53255.6821, 53255.6821, 53255.6821, 53255.6821] +25-08-28 11:10:09 | D | + error = 53255.6821 +25-08-28 11:10:09 | D | + scale = [min=0.2272, max=10.1582] +25-08-28 11:10:09 | D | - single_transformer_blocks.12.attn.out_proj +25-08-28 11:10:09 | D | + w: sint4 +25-08-28 11:10:09 | D | + x: sint4 +25-08-28 11:10:09 | D | + y: None +25-08-28 11:10:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:10:09 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:10:10 | D | + x - AbsMax +25-08-28 11:10:10 | D | + x = [min=1.5000, max=7.2188] +25-08-28 11:10:10 | D | + w - AbsMax +25-08-28 11:10:10 | D | + w = [min=0.1187, max=0.3809] +25-08-28 11:10:10 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:10:11 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:11:08 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:11:08 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:11:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:11:08 | D | - sum error = [ 5980.3030, 5955.1628, 5944.1965, 5946.2936, 5924.4105] +25-08-28 11:11:08 | D | - best error = [ 5980.3030, 5955.1628, 5944.1965, 5944.1965, 5924.4105] +25-08-28 11:11:08 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:11:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:11:08 | D | - sum error = [ 5919.5507, 5926.3334, 5925.2679, 5935.6523, 5950.7640] +25-08-28 11:11:08 | D | - best error = [ 5919.5507, 5919.5507, 5919.5507, 5919.5507, 5919.5507] +25-08-28 11:11:08 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:11:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:11:08 | D | - sum error = [ 5970.9517, 5985.7885, 6014.0356, 6030.7807, 6061.0732] +25-08-28 11:11:08 | D | - best error = [ 5919.5507, 5919.5507, 5919.5507, 5919.5507, 5919.5507] +25-08-28 11:11:08 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:11:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:11:08 | D | - sum error = [ 6086.5563, 6124.7886, 6169.9449, 6211.8751, 6258.0300] +25-08-28 11:11:08 | D | - best error = [ 5919.5507, 5919.5507, 5919.5507, 5919.5507, 5919.5507] +25-08-28 11:11:08 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:11:08 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:11:08 | D | - sum error = [ 6567.5910, 6489.3007, 6411.5580, 6340.4221, 6271.1182] +25-08-28 11:11:08 | D | - best error = [ 5919.5507, 5919.5507, 5919.5507, 5919.5507, 5919.5507] +25-08-28 11:11:08 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:11:08 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:11:08 | D | - sum error = [ 6217.1376, 6171.9436, 6127.9155, 6099.5764, 6081.1262] +25-08-28 11:11:08 | D | - best error = [ 5919.5507, 5919.5507, 5919.5507, 5919.5507, 5919.5507] +25-08-28 11:11:08 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:11:08 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:11:08 | D | - sum error = [ 6074.4284, 6078.6666, 6085.6894, 6102.4218, 6116.2740] +25-08-28 11:11:08 | D | - best error = [ 5919.5507, 5919.5507, 5919.5507, 5919.5507, 5919.5507] +25-08-28 11:11:08 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:11:08 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:11:08 | D | - sum error = [ 6142.2634, 6168.1517, 6219.0588, 6261.8151] +25-08-28 11:11:08 | D | - best error = [ 5919.5507, 5919.5507, 5919.5507, 5919.5507] +25-08-28 11:11:08 | D | + error = 5919.5507 +25-08-28 11:11:08 | D | + scale = [min=1.1067, max=1.6391] +25-08-28 11:11:08 | D | - single_transformer_blocks.12.down_proj +25-08-28 11:11:08 | D | + w: sint4 +25-08-28 11:11:08 | D | + x: uint4 +25-08-28 11:11:08 | D | + y: None +25-08-28 11:11:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:11:08 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:11:10 | D | + x - AbsMax +25-08-28 11:11:10 | D | + x = [min=1.7969, max=13.7500] +25-08-28 11:11:10 | D | + w - AbsMax +25-08-28 11:11:10 | D | + w = [min=0.1123, max=0.9531] +25-08-28 11:11:10 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:11:12 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:12:57 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:12:57 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:12:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:12:57 | D | - sum error = [ 8142.9275, 8132.7599, 8141.9671, 8190.0286, 8254.6651] +25-08-28 11:12:57 | D | - best error = [ 8142.9275, 8132.7599, 8132.7599, 8132.7599, 8132.7599] +25-08-28 11:12:57 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:12:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:12:57 | D | - sum error = [ 8293.9805, 8337.3627, 8401.0685, 8502.2764, 8561.8848] +25-08-28 11:12:57 | D | - best error = [ 8132.7599, 8132.7599, 8132.7599, 8132.7599, 8132.7599] +25-08-28 11:12:57 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:12:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:12:57 | D | - sum error = [ 8665.5179, 8785.4575, 8928.9171, 9030.1987, 9189.5510] +25-08-28 11:12:57 | D | - best error = [ 8132.7599, 8132.7599, 8132.7599, 8132.7599, 8132.7599] +25-08-28 11:12:57 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:12:57 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:12:57 | D | - sum error = [ 9344.4987, 9439.0868, 9596.3085, 9801.0926, 10011.7930] +25-08-28 11:12:57 | D | - best error = [ 8132.7599, 8132.7599, 8132.7599, 8132.7599, 8132.7599] +25-08-28 11:12:57 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:12:57 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:12:57 | D | - sum error = [ 9000.8800, 8835.4242, 8658.4045, 8554.9953, 8468.6103] +25-08-28 11:12:57 | D | - best error = [ 8132.7599, 8132.7599, 8132.7599, 8132.7599, 8132.7599] +25-08-28 11:12:57 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:12:57 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:12:57 | D | - sum error = [ 8440.8009, 8400.7138, 8410.1106, 8449.6026, 8457.1352] +25-08-28 11:12:57 | D | - best error = [ 8132.7599, 8132.7599, 8132.7599, 8132.7599, 8132.7599] +25-08-28 11:12:57 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:12:57 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:12:57 | D | - sum error = [ 8488.4595, 8575.0830, 8659.1796, 8832.7771, 9010.8313] +25-08-28 11:12:57 | D | - best error = [ 8132.7599, 8132.7599, 8132.7599, 8132.7599, 8132.7599] +25-08-28 11:12:57 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:12:57 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:12:57 | D | - sum error = [ 9149.0865, 9424.1061, 9665.7879, 9927.9871] +25-08-28 11:12:57 | D | - best error = [ 8132.7599, 8132.7599, 8132.7599, 8132.7599] +25-08-28 11:12:57 | D | + error = 8132.7599 +25-08-28 11:12:57 | D | + scale = [min=1.0297, max=1.1400] +25-08-28 11:13:15 | D | - Smoothing Diffusion Block single_transformer_blocks.13 +25-08-28 11:13:15 | D | - Skipping Module single_transformer_blocks.13.norm.linear +25-08-28 11:13:15 | D | - Smoothing Transformer Block single_transformer_blocks.13 +25-08-28 11:13:15 | D | - single_transformer_blocks.13.attn.qkv_proj + single_transformer_blocks.13.up_proj +25-08-28 11:13:15 | D | + w: sint4 +25-08-28 11:13:15 | D | + x: sint4 +25-08-28 11:13:15 | D | + y: None +25-08-28 11:13:15 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:13:15 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:13:16 | D | + x - AbsMax +25-08-28 11:13:16 | D | + x = [min=0.2021, max=24.0000] +25-08-28 11:13:16 | D | + w - AbsMax +25-08-28 11:13:16 | D | + w = [min=0.0679, max=0.8555] +25-08-28 11:13:16 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:13:17 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:15:22 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:15:22 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:15:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:15:22 | D | - sum error = [71152.3436, 69643.3263, 68272.1031, 67275.1427, 66498.8411] +25-08-28 11:15:22 | D | - best error = [71152.3436, 69643.3263, 68272.1031, 67275.1427, 66498.8411] +25-08-28 11:15:22 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:15:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:15:22 | D | - sum error = [65028.6691, 64257.3147, 63661.9391, 62997.2098, 62594.2065] +25-08-28 11:15:22 | D | - best error = [65028.6691, 64257.3147, 63661.9391, 62997.2098, 62594.2065] +25-08-28 11:15:22 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:15:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:15:22 | D | - sum error = [62797.2259, 61826.7411, 62449.5721, 61747.6381, 61382.3855] +25-08-28 11:15:22 | D | - best error = [62594.2065, 61826.7411, 61826.7411, 61747.6381, 61382.3855] +25-08-28 11:15:22 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:15:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:15:22 | D | - sum error = [61381.1426, 61452.9736, 61556.3877, 61611.2675, 62068.1927] +25-08-28 11:15:22 | D | - best error = [61381.1426, 61381.1426, 61381.1426, 61381.1426, 61381.1426] +25-08-28 11:15:22 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:15:22 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:15:22 | D | - sum error = [113933.2487, 105881.7284, 101872.5466, 94508.1848, 88401.8288] +25-08-28 11:15:22 | D | - best error = [61381.1426, 61381.1426, 61381.1426, 61381.1426, 61381.1426] +25-08-28 11:15:22 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:15:22 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:15:22 | D | - sum error = [85836.7448, 80995.1955, 77613.1981, 75953.6398, 73096.5599] +25-08-28 11:15:22 | D | - best error = [61381.1426, 61381.1426, 61381.1426, 61381.1426, 61381.1426] +25-08-28 11:15:22 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:15:22 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:15:22 | D | - sum error = [70819.0510, 69095.0918, 67085.0636, 65358.5743, 64274.5013] +25-08-28 11:15:22 | D | - best error = [61381.1426, 61381.1426, 61381.1426, 61381.1426, 61381.1426] +25-08-28 11:15:22 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:15:22 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:15:22 | D | - sum error = [63678.8864, 63585.5259, 62673.4460, 62621.6474] +25-08-28 11:15:22 | D | - best error = [61381.1426, 61381.1426, 61381.1426, 61381.1426] +25-08-28 11:15:22 | D | + error = 61381.1426 +25-08-28 11:15:22 | D | + scale = [min=0.3015, max=10.8432] +25-08-28 11:15:22 | D | - single_transformer_blocks.13.attn.out_proj +25-08-28 11:15:22 | D | + w: sint4 +25-08-28 11:15:22 | D | + x: sint4 +25-08-28 11:15:22 | D | + y: None +25-08-28 11:15:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:15:22 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:15:22 | D | + x - AbsMax +25-08-28 11:15:22 | D | + x = [min=1.4297, max=10.1250] +25-08-28 11:15:22 | D | + w - AbsMax +25-08-28 11:15:22 | D | + w = [min=0.1191, max=0.3945] +25-08-28 11:15:22 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:15:23 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:16:20 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:16:20 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:16:20 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:16:20 | D | - sum error = [ 7694.9118, 7668.9261, 7636.5021, 7605.1409, 7578.3051] +25-08-28 11:16:20 | D | - best error = [ 7694.9118, 7668.9261, 7636.5021, 7605.1409, 7578.3051] +25-08-28 11:16:20 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:16:20 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:16:20 | D | - sum error = [ 7571.9089, 7551.9717, 7549.9515, 7541.3240, 7545.3403] +25-08-28 11:16:20 | D | - best error = [ 7571.9089, 7551.9717, 7549.9515, 7541.3240, 7541.3240] +25-08-28 11:16:20 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:16:20 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:16:20 | D | - sum error = [ 7555.8502, 7553.1045, 7584.5005, 7600.1103, 7634.3602] +25-08-28 11:16:20 | D | - best error = [ 7541.3240, 7541.3240, 7541.3240, 7541.3240, 7541.3240] +25-08-28 11:16:20 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:16:20 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:16:20 | D | - sum error = [ 7669.4545, 7704.1625, 7743.2352, 7792.3065, 7843.7677] +25-08-28 11:16:20 | D | - best error = [ 7541.3240, 7541.3240, 7541.3240, 7541.3240, 7541.3240] +25-08-28 11:16:20 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:16:20 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:16:20 | D | - sum error = [ 8319.5687, 8206.8191, 8106.9420, 8009.1965, 7928.8143] +25-08-28 11:16:20 | D | - best error = [ 7541.3240, 7541.3240, 7541.3240, 7541.3240, 7541.3240] +25-08-28 11:16:20 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:16:20 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:16:20 | D | - sum error = [ 7848.8975, 7788.6176, 7754.3693, 7728.1848, 7698.6591] +25-08-28 11:16:20 | D | - best error = [ 7541.3240, 7541.3240, 7541.3240, 7541.3240, 7541.3240] +25-08-28 11:16:20 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:16:20 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:16:20 | D | - sum error = [ 7675.5466, 7649.1644, 7649.6698, 7678.7806, 7693.3925] +25-08-28 11:16:20 | D | - best error = [ 7541.3240, 7541.3240, 7541.3240, 7541.3240, 7541.3240] +25-08-28 11:16:20 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:16:20 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:16:20 | D | - sum error = [ 7713.0645, 7746.4619, 7780.1491, 7834.6454] +25-08-28 11:16:20 | D | - best error = [ 7541.3240, 7541.3240, 7541.3240, 7541.3240] +25-08-28 11:16:20 | D | + error = 7541.3240 +25-08-28 11:16:20 | D | + scale = [min=1.1537, max=2.5244] +25-08-28 11:16:20 | D | - single_transformer_blocks.13.down_proj +25-08-28 11:16:20 | D | + w: sint4 +25-08-28 11:16:20 | D | + x: uint4 +25-08-28 11:16:20 | D | + y: None +25-08-28 11:16:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:16:20 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:16:22 | D | + x - AbsMax +25-08-28 11:16:22 | D | + x = [min=1.6719, max=25.6250] +25-08-28 11:16:22 | D | + w - AbsMax +25-08-28 11:16:22 | D | + w = [min=0.1172, max=1.0078] +25-08-28 11:16:22 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:16:24 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:18:08 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:18:08 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:18:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:18:08 | D | - sum error = [ 8442.1850, 8470.3275, 8459.5168, 8499.6047, 8537.4221] +25-08-28 11:18:08 | D | - best error = [ 8442.1850, 8442.1850, 8442.1850, 8442.1850, 8442.1850] +25-08-28 11:18:08 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:18:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:18:08 | D | - sum error = [ 8580.9529, 8658.6174, 8773.6109, 8839.5602, 8893.6337] +25-08-28 11:18:08 | D | - best error = [ 8442.1850, 8442.1850, 8442.1850, 8442.1850, 8442.1850] +25-08-28 11:18:08 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:18:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:18:08 | D | - sum error = [ 8936.2028, 9085.9135, 9226.3700, 9366.8919, 9509.7289] +25-08-28 11:18:08 | D | - best error = [ 8442.1850, 8442.1850, 8442.1850, 8442.1850, 8442.1850] +25-08-28 11:18:08 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:18:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:18:08 | D | - sum error = [ 9663.1357, 9789.9818, 9942.7541, 10061.0274, 10294.1039] +25-08-28 11:18:08 | D | - best error = [ 8442.1850, 8442.1850, 8442.1850, 8442.1850, 8442.1850] +25-08-28 11:18:08 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:18:08 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:18:08 | D | - sum error = [ 9303.7949, 9167.5916, 9052.9880, 8949.5703, 8839.7403] +25-08-28 11:18:08 | D | - best error = [ 8442.1850, 8442.1850, 8442.1850, 8442.1850, 8442.1850] +25-08-28 11:18:08 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:18:08 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:18:08 | D | - sum error = [ 8798.5203, 8750.3463, 8770.1754, 8763.0314, 8790.2218] +25-08-28 11:18:08 | D | - best error = [ 8442.1850, 8442.1850, 8442.1850, 8442.1850, 8442.1850] +25-08-28 11:18:08 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:18:08 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:18:08 | D | - sum error = [ 8815.6470, 8897.5457, 9026.9698, 9188.5577, 9364.0214] +25-08-28 11:18:08 | D | - best error = [ 8442.1850, 8442.1850, 8442.1850, 8442.1850, 8442.1850] +25-08-28 11:18:08 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:18:08 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:18:08 | D | - sum error = [ 9525.9717, 9782.0640, 9957.0036, 10176.9316] +25-08-28 11:18:08 | D | - best error = [ 8442.1850, 8442.1850, 8442.1850, 8442.1850] +25-08-28 11:18:08 | D | + error = 8442.1850 +25-08-28 11:18:08 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 11:18:25 | D | - Smoothing Diffusion Block single_transformer_blocks.14 +25-08-28 11:18:25 | D | - Skipping Module single_transformer_blocks.14.norm.linear +25-08-28 11:18:25 | D | - Smoothing Transformer Block single_transformer_blocks.14 +25-08-28 11:18:25 | D | - single_transformer_blocks.14.attn.qkv_proj + single_transformer_blocks.14.up_proj +25-08-28 11:18:25 | D | + w: sint4 +25-08-28 11:18:25 | D | + x: sint4 +25-08-28 11:18:25 | D | + y: None +25-08-28 11:18:25 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:18:25 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:18:26 | D | + x - AbsMax +25-08-28 11:18:26 | D | + x = [min=0.1367, max=30.7500] +25-08-28 11:18:26 | D | + w - AbsMax +25-08-28 11:18:26 | D | + w = [min=0.0845, max=1.1094] +25-08-28 11:18:26 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:18:27 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:20:32 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:20:32 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:20:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:20:32 | D | - sum error = [77200.4128, 75844.1923, 72859.6925, 71290.3624, 70772.7658] +25-08-28 11:20:32 | D | - best error = [77200.4128, 75844.1923, 72859.6925, 71290.3624, 70772.7658] +25-08-28 11:20:32 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:20:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:20:32 | D | - sum error = [69033.0231, 67010.7927, 66179.8684, 64792.8727, 63125.9495] +25-08-28 11:20:32 | D | - best error = [69033.0231, 67010.7927, 66179.8684, 64792.8727, 63125.9495] +25-08-28 11:20:32 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:20:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:20:32 | D | - sum error = [62422.7917, 61969.6091, 61096.8333, 61220.9533, 61032.9453] +25-08-28 11:20:32 | D | - best error = [62422.7917, 61969.6091, 61096.8333, 61096.8333, 61032.9453] +25-08-28 11:20:32 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:20:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:20:32 | D | - sum error = [60686.7261, 60218.5332, 60522.8864, 61085.2152, 60716.4837] +25-08-28 11:20:32 | D | - best error = [60686.7261, 60218.5332, 60218.5332, 60218.5332, 60218.5332] +25-08-28 11:20:32 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:20:32 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:20:32 | D | - sum error = [98979.0986, 95088.9032, 91168.0618, 87751.4074, 84514.1083] +25-08-28 11:20:32 | D | - best error = [60218.5332, 60218.5332, 60218.5332, 60218.5332, 60218.5332] +25-08-28 11:20:32 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:20:32 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:20:32 | D | - sum error = [80763.3216, 77436.3768, 75319.4354, 71923.9634, 69774.4279] +25-08-28 11:20:32 | D | - best error = [60218.5332, 60218.5332, 60218.5332, 60218.5332, 60218.5332] +25-08-28 11:20:32 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:20:32 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:20:32 | D | - sum error = [68088.7628, 66241.9864, 64492.9238, 62963.0129, 62278.8896] +25-08-28 11:20:32 | D | - best error = [60218.5332, 60218.5332, 60218.5332, 60218.5332, 60218.5332] +25-08-28 11:20:32 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:20:32 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:20:32 | D | - sum error = [61766.5737, 61642.2671, 61375.5912, 61125.2689] +25-08-28 11:20:32 | D | - best error = [60218.5332, 60218.5332, 60218.5332, 60218.5332] +25-08-28 11:20:32 | D | + error = 60218.5332 +25-08-28 11:20:32 | D | + scale = [min=0.2035, max=15.4980] +25-08-28 11:20:32 | D | - single_transformer_blocks.14.attn.out_proj +25-08-28 11:20:32 | D | + w: sint4 +25-08-28 11:20:32 | D | + x: sint4 +25-08-28 11:20:32 | D | + y: None +25-08-28 11:20:32 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:20:32 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:20:33 | D | + x - AbsMax +25-08-28 11:20:33 | D | + x = [min=1.1250, max=8.0000] +25-08-28 11:20:33 | D | + w - AbsMax +25-08-28 11:20:33 | D | + w = [min=0.1240, max=0.4336] +25-08-28 11:20:33 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:20:34 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:21:32 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:21:32 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:21:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:21:32 | D | - sum error = [ 6760.4680, 6738.4654, 6718.5921, 6714.0670, 6721.2207] +25-08-28 11:21:32 | D | - best error = [ 6760.4680, 6738.4654, 6718.5921, 6714.0670, 6714.0670] +25-08-28 11:21:32 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:21:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:21:32 | D | - sum error = [ 6722.1660, 6733.5857, 6736.8764, 6740.2564, 6764.6907] +25-08-28 11:21:32 | D | - best error = [ 6714.0670, 6714.0670, 6714.0670, 6714.0670, 6714.0670] +25-08-28 11:21:32 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:21:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:21:32 | D | - sum error = [ 6781.0919, 6799.3597, 6826.2904, 6857.6684, 6894.3029] +25-08-28 11:21:32 | D | - best error = [ 6714.0670, 6714.0670, 6714.0670, 6714.0670, 6714.0670] +25-08-28 11:21:32 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:21:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:21:32 | D | - sum error = [ 6940.6665, 6993.7375, 7039.4781, 7094.3851, 7145.3901] +25-08-28 11:21:32 | D | - best error = [ 6714.0670, 6714.0670, 6714.0670, 6714.0670, 6714.0670] +25-08-28 11:21:32 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:21:32 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:21:32 | D | - sum error = [ 7126.6764, 7061.1928, 7000.7560, 6962.5814, 6923.7929] +25-08-28 11:21:32 | D | - best error = [ 6714.0670, 6714.0670, 6714.0670, 6714.0670, 6714.0670] +25-08-28 11:21:32 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:21:32 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:21:32 | D | - sum error = [ 6887.4826, 6862.9836, 6858.9367, 6854.9322, 6856.3827] +25-08-28 11:21:32 | D | - best error = [ 6714.0670, 6714.0670, 6714.0670, 6714.0670, 6714.0670] +25-08-28 11:21:32 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:21:32 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:21:32 | D | - sum error = [ 6860.2988, 6872.5436, 6885.9059, 6909.4848, 6948.2349] +25-08-28 11:21:32 | D | - best error = [ 6714.0670, 6714.0670, 6714.0670, 6714.0670, 6714.0670] +25-08-28 11:21:32 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:21:32 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:21:32 | D | - sum error = [ 6992.4193, 7031.4189, 7088.3411, 7138.8980] +25-08-28 11:21:32 | D | - best error = [ 6714.0670, 6714.0670, 6714.0670, 6714.0670] +25-08-28 11:21:32 | D | + error = 6714.0670 +25-08-28 11:21:32 | D | + scale = [min=1.0178, max=1.3660] +25-08-28 11:21:32 | D | - single_transformer_blocks.14.down_proj +25-08-28 11:21:32 | D | + w: sint4 +25-08-28 11:21:32 | D | + x: uint4 +25-08-28 11:21:32 | D | + y: None +25-08-28 11:21:32 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:21:32 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:21:34 | D | + x - AbsMax +25-08-28 11:21:34 | D | + x = [min=1.6172, max=17.5000] +25-08-28 11:21:34 | D | + w - AbsMax +25-08-28 11:21:34 | D | + w = [min=0.1167, max=1.0391] +25-08-28 11:21:34 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:21:36 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:23:19 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:23:19 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:23:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:23:19 | D | - sum error = [ 8676.2356, 8697.2253, 8734.9624, 8751.6307, 8822.5647] +25-08-28 11:23:19 | D | - best error = [ 8676.2356, 8676.2356, 8676.2356, 8676.2356, 8676.2356] +25-08-28 11:23:19 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:23:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:23:19 | D | - sum error = [ 8888.3542, 8926.3829, 8969.1141, 8997.8535, 9059.7934] +25-08-28 11:23:19 | D | - best error = [ 8676.2356, 8676.2356, 8676.2356, 8676.2356, 8676.2356] +25-08-28 11:23:19 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:23:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:23:19 | D | - sum error = [ 9207.4370, 9297.6556, 9427.6903, 9532.0369, 9642.7967] +25-08-28 11:23:19 | D | - best error = [ 8676.2356, 8676.2356, 8676.2356, 8676.2356, 8676.2356] +25-08-28 11:23:19 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:23:19 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:23:19 | D | - sum error = [ 9806.4216, 9956.9702, 10099.1770, 10238.7343, 10465.4506] +25-08-28 11:23:19 | D | - best error = [ 8676.2356, 8676.2356, 8676.2356, 8676.2356, 8676.2356] +25-08-28 11:23:19 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:23:19 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:23:19 | D | - sum error = [ 9692.2926, 9489.9155, 9390.7708, 9223.5681, 9095.2709] +25-08-28 11:23:19 | D | - best error = [ 8676.2356, 8676.2356, 8676.2356, 8676.2356, 8676.2356] +25-08-28 11:23:19 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:23:19 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:23:19 | D | - sum error = [ 9084.4443, 8990.8300, 8969.7581, 8989.0551, 8989.1730] +25-08-28 11:23:19 | D | - best error = [ 8676.2356, 8676.2356, 8676.2356, 8676.2356, 8676.2356] +25-08-28 11:23:19 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:23:19 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:23:19 | D | - sum error = [ 9084.3188, 9137.1006, 9259.5253, 9394.7262, 9502.9444] +25-08-28 11:23:19 | D | - best error = [ 8676.2356, 8676.2356, 8676.2356, 8676.2356, 8676.2356] +25-08-28 11:23:19 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:23:19 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:23:19 | D | - sum error = [ 9723.2748, 9984.6270, 10152.9389, 10370.3952] +25-08-28 11:23:19 | D | - best error = [ 8676.2356, 8676.2356, 8676.2356, 8676.2356] +25-08-28 11:23:19 | D | + error = 8676.2356 +25-08-28 11:23:19 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 11:23:37 | D | - Smoothing Diffusion Block single_transformer_blocks.15 +25-08-28 11:23:37 | D | - Skipping Module single_transformer_blocks.15.norm.linear +25-08-28 11:23:37 | D | - Smoothing Transformer Block single_transformer_blocks.15 +25-08-28 11:23:37 | D | - single_transformer_blocks.15.attn.qkv_proj + single_transformer_blocks.15.up_proj +25-08-28 11:23:37 | D | + w: sint4 +25-08-28 11:23:37 | D | + x: sint4 +25-08-28 11:23:37 | D | + y: None +25-08-28 11:23:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:23:37 | D | + finished parsing calibration arguments, ram usage: 17.6 +25-08-28 11:23:38 | D | + x - AbsMax +25-08-28 11:23:38 | D | + x = [min=0.2910, max=21.3750] +25-08-28 11:23:38 | D | + w - AbsMax +25-08-28 11:23:38 | D | + w = [min=0.1328, max=0.7812] +25-08-28 11:23:38 | D | + finished resetting calibrator, ram usage: 17.6 +25-08-28 11:23:39 | D | + finished calculating the original outputs, ram usage: 17.8 +25-08-28 11:25:44 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:25:44 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:25:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:25:44 | D | - sum error = [70096.8455, 68030.7620, 66500.6543, 64870.6262, 63798.4847] +25-08-28 11:25:44 | D | - best error = [70096.8455, 68030.7620, 66500.6543, 64870.6262, 63798.4847] +25-08-28 11:25:44 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:25:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:25:44 | D | - sum error = [62893.9343, 61836.4811, 61121.6084, 60484.1099, 60143.9234] +25-08-28 11:25:44 | D | - best error = [62893.9343, 61836.4811, 61121.6084, 60484.1099, 60143.9234] +25-08-28 11:25:44 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:25:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:25:44 | D | - sum error = [59570.0027, 59149.8296, 59312.0604, 59477.2341, 59036.3858] +25-08-28 11:25:44 | D | - best error = [59570.0027, 59149.8296, 59149.8296, 59149.8296, 59036.3858] +25-08-28 11:25:44 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:25:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:25:44 | D | - sum error = [59153.2226, 59080.6429, 59427.9137, 59915.5865, 60325.0863] +25-08-28 11:25:44 | D | - best error = [59036.3858, 59036.3858, 59036.3858, 59036.3858, 59036.3858] +25-08-28 11:25:44 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:25:44 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:25:44 | D | - sum error = [111711.7067, 103043.0012, 96602.1436, 93451.7527, 86916.5360] +25-08-28 11:25:44 | D | - best error = [59036.3858, 59036.3858, 59036.3858, 59036.3858, 59036.3858] +25-08-28 11:25:44 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:25:44 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:25:44 | D | - sum error = [81514.6000, 77087.1897, 73744.1823, 70793.1170, 68479.8093] +25-08-28 11:25:44 | D | - best error = [59036.3858, 59036.3858, 59036.3858, 59036.3858, 59036.3858] +25-08-28 11:25:44 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:25:44 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:25:44 | D | - sum error = [66199.7868, 64249.4817, 62925.7950, 61565.8357, 60897.1056] +25-08-28 11:25:44 | D | - best error = [59036.3858, 59036.3858, 59036.3858, 59036.3858, 59036.3858] +25-08-28 11:25:44 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:25:44 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:25:44 | D | - sum error = [60628.2163, 60359.8956, 60107.9030, 60474.3426] +25-08-28 11:25:44 | D | - best error = [59036.3858, 59036.3858, 59036.3858, 59036.3858] +25-08-28 11:25:44 | D | + error = 59036.3858 +25-08-28 11:25:44 | D | + scale = [min=0.4214, max=8.5297] +25-08-28 11:25:44 | D | - single_transformer_blocks.15.attn.out_proj +25-08-28 11:25:44 | D | + w: sint4 +25-08-28 11:25:44 | D | + x: sint4 +25-08-28 11:25:44 | D | + y: None +25-08-28 11:25:44 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:25:44 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:25:45 | D | + x - AbsMax +25-08-28 11:25:45 | D | + x = [min=1.3750, max=8.1250] +25-08-28 11:25:45 | D | + w - AbsMax +25-08-28 11:25:45 | D | + w = [min=0.1206, max=0.4512] +25-08-28 11:25:45 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:25:45 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:26:42 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:26:42 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:26:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:26:42 | D | - sum error = [ 7322.7347, 7285.3950, 7251.8985, 7219.9205, 7192.8392] +25-08-28 11:26:42 | D | - best error = [ 7322.7347, 7285.3950, 7251.8985, 7219.9205, 7192.8392] +25-08-28 11:26:42 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:26:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:26:42 | D | - sum error = [ 7172.6001, 7172.0700, 7164.3018, 7161.0751, 7170.3897] +25-08-28 11:26:42 | D | - best error = [ 7172.6001, 7172.0700, 7164.3018, 7161.0751, 7161.0751] +25-08-28 11:26:42 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:26:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:26:42 | D | - sum error = [ 7171.3876, 7188.1221, 7214.9146, 7243.4989, 7271.2695] +25-08-28 11:26:42 | D | - best error = [ 7161.0751, 7161.0751, 7161.0751, 7161.0751, 7161.0751] +25-08-28 11:26:42 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:26:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:26:42 | D | - sum error = [ 7296.1105, 7357.0222, 7392.1748, 7467.0168, 7526.8089] +25-08-28 11:26:42 | D | - best error = [ 7161.0751, 7161.0751, 7161.0751, 7161.0751, 7161.0751] +25-08-28 11:26:42 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:26:42 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:26:42 | D | - sum error = [ 7942.0262, 7819.3381, 7725.3344, 7636.5450, 7568.3018] +25-08-28 11:26:42 | D | - best error = [ 7161.0751, 7161.0751, 7161.0751, 7161.0751, 7161.0751] +25-08-28 11:26:42 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:26:42 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:26:42 | D | - sum error = [ 7499.1922, 7451.1094, 7403.6086, 7362.5270, 7332.9388] +25-08-28 11:26:42 | D | - best error = [ 7161.0751, 7161.0751, 7161.0751, 7161.0751, 7161.0751] +25-08-28 11:26:42 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:26:42 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:26:42 | D | - sum error = [ 7319.4227, 7319.2543, 7318.8007, 7337.4900, 7355.8056] +25-08-28 11:26:42 | D | - best error = [ 7161.0751, 7161.0751, 7161.0751, 7161.0751, 7161.0751] +25-08-28 11:26:42 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:26:42 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:26:42 | D | - sum error = [ 7359.9391, 7410.2383, 7454.6828, 7529.2162] +25-08-28 11:26:42 | D | - best error = [ 7161.0751, 7161.0751, 7161.0751, 7161.0751] +25-08-28 11:26:42 | D | + error = 7161.0751 +25-08-28 11:26:42 | D | + scale = [min=1.1359, max=2.3117] +25-08-28 11:26:42 | D | - single_transformer_blocks.15.down_proj +25-08-28 11:26:42 | D | + w: sint4 +25-08-28 11:26:42 | D | + x: uint4 +25-08-28 11:26:42 | D | + y: None +25-08-28 11:26:42 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:26:42 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:26:44 | D | + x - AbsMax +25-08-28 11:26:44 | D | + x = [min=1.6875, max=18.5000] +25-08-28 11:26:44 | D | + w - AbsMax +25-08-28 11:26:44 | D | + w = [min=0.1167, max=1.2578] +25-08-28 11:26:44 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:26:46 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:28:34 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:28:34 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:28:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:28:34 | D | - sum error = [ 8657.3474, 8673.3606, 8685.7529, 8739.7712, 8802.7946] +25-08-28 11:28:34 | D | - best error = [ 8657.3474, 8657.3474, 8657.3474, 8657.3474, 8657.3474] +25-08-28 11:28:34 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:28:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:28:34 | D | - sum error = [ 8836.8397, 8839.9824, 8891.3477, 9000.3018, 9114.5049] +25-08-28 11:28:34 | D | - best error = [ 8657.3474, 8657.3474, 8657.3474, 8657.3474, 8657.3474] +25-08-28 11:28:34 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:28:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:28:34 | D | - sum error = [ 9165.0519, 9293.9722, 9453.1963, 9604.2207, 9699.7146] +25-08-28 11:28:34 | D | - best error = [ 8657.3474, 8657.3474, 8657.3474, 8657.3474, 8657.3474] +25-08-28 11:28:34 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:28:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:28:34 | D | - sum error = [ 9830.5158, 10001.4259, 10189.1966, 10326.5719, 10521.3429] +25-08-28 11:28:34 | D | - best error = [ 8657.3474, 8657.3474, 8657.3474, 8657.3474, 8657.3474] +25-08-28 11:28:34 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:28:34 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:28:34 | D | - sum error = [ 9692.7173, 9538.4829, 9381.0652, 9229.0142, 9158.7894] +25-08-28 11:28:34 | D | - best error = [ 8657.3474, 8657.3474, 8657.3474, 8657.3474, 8657.3474] +25-08-28 11:28:34 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:28:34 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:28:34 | D | - sum error = [ 9091.7272, 9064.9523, 9060.0499, 9110.3572, 9124.5147] +25-08-28 11:28:34 | D | - best error = [ 8657.3474, 8657.3474, 8657.3474, 8657.3474, 8657.3474] +25-08-28 11:28:34 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:28:34 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:28:34 | D | - sum error = [ 9177.4049, 9249.4359, 9330.1648, 9468.8027, 9575.8091] +25-08-28 11:28:34 | D | - best error = [ 8657.3474, 8657.3474, 8657.3474, 8657.3474, 8657.3474] +25-08-28 11:28:34 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:28:34 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:28:34 | D | - sum error = [ 9827.9162, 9996.9315, 10102.8566, 10435.0586] +25-08-28 11:28:34 | D | - best error = [ 8657.3474, 8657.3474, 8657.3474, 8657.3474] +25-08-28 11:28:34 | D | + error = 8657.3474 +25-08-28 11:28:34 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 11:28:52 | D | - Smoothing Diffusion Block single_transformer_blocks.16 +25-08-28 11:28:52 | D | - Skipping Module single_transformer_blocks.16.norm.linear +25-08-28 11:28:52 | D | - Smoothing Transformer Block single_transformer_blocks.16 +25-08-28 11:28:52 | D | - single_transformer_blocks.16.attn.qkv_proj + single_transformer_blocks.16.up_proj +25-08-28 11:28:52 | D | + w: sint4 +25-08-28 11:28:52 | D | + x: sint4 +25-08-28 11:28:52 | D | + y: None +25-08-28 11:28:52 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:28:52 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:28:53 | D | + x - AbsMax +25-08-28 11:28:53 | D | + x = [min=0.4082, max=25.6250] +25-08-28 11:28:53 | D | + w - AbsMax +25-08-28 11:28:53 | D | + w = [min=0.0938, max=1.1797] +25-08-28 11:28:53 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:28:54 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:30:59 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:30:59 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:30:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:30:59 | D | - sum error = [76042.8039, 73894.5376, 70784.0940, 68967.0233, 67175.2373] +25-08-28 11:30:59 | D | - best error = [76042.8039, 73894.5376, 70784.0940, 68967.0233, 67175.2373] +25-08-28 11:30:59 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:30:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:30:59 | D | - sum error = [66409.8730, 64780.0186, 63890.0716, 62636.5159, 62006.6742] +25-08-28 11:30:59 | D | - best error = [66409.8730, 64780.0186, 63890.0716, 62636.5159, 62006.6742] +25-08-28 11:30:59 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:30:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:30:59 | D | - sum error = [60954.6706, 59908.7541, 60232.7114, 59324.8953, 59002.4553] +25-08-28 11:30:59 | D | - best error = [60954.6706, 59908.7541, 59908.7541, 59324.8953, 59002.4553] +25-08-28 11:30:59 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:30:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:30:59 | D | - sum error = [59651.6072, 59124.3798, 59349.1246, 58998.4699, 58738.5693] +25-08-28 11:30:59 | D | - best error = [59002.4553, 59002.4553, 59002.4553, 58998.4699, 58738.5693] +25-08-28 11:30:59 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:30:59 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:30:59 | D | - sum error = [115472.7748, 107487.6428, 101257.8251, 96011.3220, 91511.2891] +25-08-28 11:30:59 | D | - best error = [58738.5693, 58738.5693, 58738.5693, 58738.5693, 58738.5693] +25-08-28 11:30:59 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:30:59 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:30:59 | D | - sum error = [87599.3615, 82872.2066, 79409.5974, 75745.3925, 72772.8195] +25-08-28 11:30:59 | D | - best error = [58738.5693, 58738.5693, 58738.5693, 58738.5693, 58738.5693] +25-08-28 11:30:59 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:30:59 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:30:59 | D | - sum error = [70076.3981, 66799.6340, 65205.6400, 63075.6102, 61612.8194] +25-08-28 11:30:59 | D | - best error = [58738.5693, 58738.5693, 58738.5693, 58738.5693, 58738.5693] +25-08-28 11:30:59 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:30:59 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:30:59 | D | - sum error = [60879.8297, 59864.0508, 60551.7437, 59317.1425] +25-08-28 11:30:59 | D | - best error = [58738.5693, 58738.5693, 58738.5693, 58738.5693] +25-08-28 11:30:59 | D | + error = 58738.5693 +25-08-28 11:30:59 | D | + scale = [min=0.4269, max=21.7887] +25-08-28 11:30:59 | D | - single_transformer_blocks.16.attn.out_proj +25-08-28 11:30:59 | D | + w: sint4 +25-08-28 11:30:59 | D | + x: sint4 +25-08-28 11:30:59 | D | + y: None +25-08-28 11:30:59 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:30:59 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:31:00 | D | + x - AbsMax +25-08-28 11:31:00 | D | + x = [min=0.8984, max=12.8125] +25-08-28 11:31:00 | D | + w - AbsMax +25-08-28 11:31:00 | D | + w = [min=0.1133, max=0.4023] +25-08-28 11:31:00 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:31:00 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:31:58 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:31:58 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:31:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:31:58 | D | - sum error = [ 6523.3872, 6492.4245, 6474.8461, 6460.7690, 6462.8310] +25-08-28 11:31:58 | D | - best error = [ 6523.3872, 6492.4245, 6474.8461, 6460.7690, 6460.7690] +25-08-28 11:31:58 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:31:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:31:58 | D | - sum error = [ 6459.9306, 6468.9526, 6486.5455, 6492.6312, 6518.8134] +25-08-28 11:31:58 | D | - best error = [ 6459.9306, 6459.9306, 6459.9306, 6459.9306, 6459.9306] +25-08-28 11:31:58 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:31:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:31:58 | D | - sum error = [ 6541.9603, 6565.3356, 6598.0339, 6657.0720, 6697.8794] +25-08-28 11:31:58 | D | - best error = [ 6459.9306, 6459.9306, 6459.9306, 6459.9306, 6459.9306] +25-08-28 11:31:58 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:31:58 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:31:58 | D | - sum error = [ 6747.4914, 6807.5840, 6871.4831, 6950.6319, 7017.0071] +25-08-28 11:31:58 | D | - best error = [ 6459.9306, 6459.9306, 6459.9306, 6459.9306, 6459.9306] +25-08-28 11:31:58 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:31:58 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:31:58 | D | - sum error = [ 6882.7422, 6828.3793, 6764.4929, 6727.0725, 6691.6754] +25-08-28 11:31:58 | D | - best error = [ 6459.9306, 6459.9306, 6459.9306, 6459.9306, 6459.9306] +25-08-28 11:31:58 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:31:58 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:31:58 | D | - sum error = [ 6663.8535, 6638.8541, 6621.6481, 6640.6042, 6634.8498] +25-08-28 11:31:58 | D | - best error = [ 6459.9306, 6459.9306, 6459.9306, 6459.9306, 6459.9306] +25-08-28 11:31:58 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:31:58 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:31:58 | D | - sum error = [ 6648.7536, 6660.6630, 6704.8916, 6739.3958, 6766.1076] +25-08-28 11:31:58 | D | - best error = [ 6459.9306, 6459.9306, 6459.9306, 6459.9306, 6459.9306] +25-08-28 11:31:58 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:31:58 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:31:58 | D | - sum error = [ 6817.1760, 6868.4694, 6943.3433, 7021.5144] +25-08-28 11:31:58 | D | - best error = [ 6459.9306, 6459.9306, 6459.9306, 6459.9306] +25-08-28 11:31:58 | D | + error = 6459.9306 +25-08-28 11:31:58 | D | + scale = [min=0.9736, max=1.8919] +25-08-28 11:31:58 | D | - single_transformer_blocks.16.down_proj +25-08-28 11:31:58 | D | + w: sint4 +25-08-28 11:31:58 | D | + x: uint4 +25-08-28 11:31:58 | D | + y: None +25-08-28 11:31:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:31:58 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:32:00 | D | + x - AbsMax +25-08-28 11:32:00 | D | + x = [min=1.1250, max=15.5000] +25-08-28 11:32:00 | D | + w - AbsMax +25-08-28 11:32:00 | D | + w = [min=0.1133, max=1.1406] +25-08-28 11:32:00 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:32:03 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:34:12 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:34:12 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:34:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:34:12 | D | - sum error = [ 8147.1644, 8174.5280, 8173.3917, 8201.1014, 8259.9961] +25-08-28 11:34:12 | D | - best error = [ 8147.1644, 8147.1644, 8147.1644, 8147.1644, 8147.1644] +25-08-28 11:34:12 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:34:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:34:12 | D | - sum error = [ 8284.8648, 8357.4092, 8398.9252, 8445.3343, 8522.0101] +25-08-28 11:34:12 | D | - best error = [ 8147.1644, 8147.1644, 8147.1644, 8147.1644, 8147.1644] +25-08-28 11:34:12 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:34:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:34:12 | D | - sum error = [ 8637.6284, 8721.5201, 8835.5265, 8954.6332, 9044.6603] +25-08-28 11:34:12 | D | - best error = [ 8147.1644, 8147.1644, 8147.1644, 8147.1644, 8147.1644] +25-08-28 11:34:12 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:34:12 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:34:12 | D | - sum error = [ 9178.9590, 9343.1935, 9509.1560, 9659.3109, 9784.4060] +25-08-28 11:34:12 | D | - best error = [ 8147.1644, 8147.1644, 8147.1644, 8147.1644, 8147.1644] +25-08-28 11:34:12 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:34:12 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:34:12 | D | - sum error = [ 9331.0162, 9094.9093, 8977.2548, 8865.7449, 8811.2946] +25-08-28 11:34:12 | D | - best error = [ 8147.1644, 8147.1644, 8147.1644, 8147.1644, 8147.1644] +25-08-28 11:34:12 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:34:12 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:34:12 | D | - sum error = [ 8713.1423, 8673.6087, 8676.1886, 8619.9963, 8620.4608] +25-08-28 11:34:12 | D | - best error = [ 8147.1644, 8147.1644, 8147.1644, 8147.1644, 8147.1644] +25-08-28 11:34:12 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:34:12 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:34:12 | D | - sum error = [ 8652.9791, 8652.6005, 8762.8772, 8879.1432, 8936.6750] +25-08-28 11:34:12 | D | - best error = [ 8147.1644, 8147.1644, 8147.1644, 8147.1644, 8147.1644] +25-08-28 11:34:12 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:34:12 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:34:12 | D | - sum error = [ 9194.3452, 9378.1141, 9543.9765, 9709.4070] +25-08-28 11:34:12 | D | - best error = [ 8147.1644, 8147.1644, 8147.1644, 8147.1644] +25-08-28 11:34:12 | D | + error = 8147.1644 +25-08-28 11:34:12 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 11:34:32 | D | - Smoothing Diffusion Block single_transformer_blocks.17 +25-08-28 11:34:32 | D | - Skipping Module single_transformer_blocks.17.norm.linear +25-08-28 11:34:32 | D | - Smoothing Transformer Block single_transformer_blocks.17 +25-08-28 11:34:32 | D | - single_transformer_blocks.17.attn.qkv_proj + single_transformer_blocks.17.up_proj +25-08-28 11:34:32 | D | + w: sint4 +25-08-28 11:34:32 | D | + x: sint4 +25-08-28 11:34:32 | D | + y: None +25-08-28 11:34:32 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:34:32 | D | + finished parsing calibration arguments, ram usage: 17.8 +25-08-28 11:34:33 | D | + x - AbsMax +25-08-28 11:34:33 | D | + x = [min=0.3320, max=22.2500] +25-08-28 11:34:33 | D | + w - AbsMax +25-08-28 11:34:33 | D | + w = [min=0.1367, max=0.8164] +25-08-28 11:34:33 | D | + finished resetting calibrator, ram usage: 17.9 +25-08-28 11:34:34 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 11:36:42 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:36:42 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:36:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:36:42 | D | - sum error = [74722.9156, 72846.7612, 71142.6070, 69528.1873, 68407.2361] +25-08-28 11:36:42 | D | - best error = [74722.9156, 72846.7612, 71142.6070, 69528.1873, 68407.2361] +25-08-28 11:36:42 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:36:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:36:42 | D | - sum error = [66987.7028, 65802.7812, 64948.0265, 64115.6475, 63518.1158] +25-08-28 11:36:42 | D | - best error = [66987.7028, 65802.7812, 64948.0265, 64115.6475, 63518.1158] +25-08-28 11:36:42 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:36:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:36:42 | D | - sum error = [63288.3737, 62695.2911, 62629.8802, 62729.2442, 63309.9641] +25-08-28 11:36:42 | D | - best error = [63288.3737, 62695.2911, 62629.8802, 62629.8802, 62629.8802] +25-08-28 11:36:42 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:36:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:36:42 | D | - sum error = [63498.1685, 62966.6070, 63879.6388, 63748.9516, 64776.0588] +25-08-28 11:36:42 | D | - best error = [62629.8802, 62629.8802, 62629.8802, 62629.8802, 62629.8802] +25-08-28 11:36:42 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:36:42 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:36:42 | D | - sum error = [141865.7195, 132325.7537, 120958.5481, 112198.3843, 104451.7744] +25-08-28 11:36:42 | D | - best error = [62629.8802, 62629.8802, 62629.8802, 62629.8802, 62629.8802] +25-08-28 11:36:42 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:36:42 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:36:42 | D | - sum error = [98123.9184, 91466.0092, 85967.8020, 80452.3404, 76669.6026] +25-08-28 11:36:42 | D | - best error = [62629.8802, 62629.8802, 62629.8802, 62629.8802, 62629.8802] +25-08-28 11:36:42 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:36:42 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:36:42 | D | - sum error = [72917.0365, 69803.6194, 67462.9408, 65558.3634, 64546.8854] +25-08-28 11:36:42 | D | - best error = [62629.8802, 62629.8802, 62629.8802, 62629.8802, 62629.8802] +25-08-28 11:36:42 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:36:42 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:36:42 | D | - sum error = [63568.5860, 63984.6762, 64448.7666, 64421.3152] +25-08-28 11:36:42 | D | - best error = [62629.8802, 62629.8802, 62629.8802, 62629.8802] +25-08-28 11:36:42 | D | + error = 62629.8802 +25-08-28 11:36:42 | D | + scale = [min=0.5161, max=6.4328] +25-08-28 11:36:42 | D | - single_transformer_blocks.17.attn.out_proj +25-08-28 11:36:42 | D | + w: sint4 +25-08-28 11:36:42 | D | + x: sint4 +25-08-28 11:36:42 | D | + y: None +25-08-28 11:36:42 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:36:42 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:36:43 | D | + x - AbsMax +25-08-28 11:36:43 | D | + x = [min=1.2031, max=9.3125] +25-08-28 11:36:43 | D | + w - AbsMax +25-08-28 11:36:43 | D | + w = [min=0.1216, max=0.6016] +25-08-28 11:36:43 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:36:44 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:37:45 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:37:45 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:37:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:37:45 | D | - sum error = [ 7645.3460, 7611.6232, 7585.9913, 7560.2624, 7558.4033] +25-08-28 11:37:45 | D | - best error = [ 7645.3460, 7611.6232, 7585.9913, 7560.2624, 7558.4033] +25-08-28 11:37:45 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:37:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:37:45 | D | - sum error = [ 7545.8510, 7535.5685, 7530.0372, 7526.7706, 7533.3541] +25-08-28 11:37:45 | D | - best error = [ 7545.8510, 7535.5685, 7530.0372, 7526.7706, 7526.7706] +25-08-28 11:37:45 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:37:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:37:45 | D | - sum error = [ 7555.6350, 7571.0163, 7596.3999, 7622.1879, 7653.1792] +25-08-28 11:37:45 | D | - best error = [ 7526.7706, 7526.7706, 7526.7706, 7526.7706, 7526.7706] +25-08-28 11:37:45 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:37:45 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:37:45 | D | - sum error = [ 7682.1419, 7734.7838, 7779.8571, 7828.5074, 7893.4948] +25-08-28 11:37:45 | D | - best error = [ 7526.7706, 7526.7706, 7526.7706, 7526.7706, 7526.7706] +25-08-28 11:37:45 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:37:45 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:37:45 | D | - sum error = [ 8259.5680, 8147.3893, 8052.3547, 7974.0120, 7898.9906] +25-08-28 11:37:45 | D | - best error = [ 7526.7706, 7526.7706, 7526.7706, 7526.7706, 7526.7706] +25-08-28 11:37:45 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:37:45 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:37:45 | D | - sum error = [ 7845.2977, 7798.7025, 7749.7172, 7717.3822, 7702.9838] +25-08-28 11:37:45 | D | - best error = [ 7526.7706, 7526.7706, 7526.7706, 7526.7706, 7526.7706] +25-08-28 11:37:45 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:37:45 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:37:45 | D | - sum error = [ 7683.8168, 7673.2176, 7668.1841, 7684.4183, 7707.6295] +25-08-28 11:37:45 | D | - best error = [ 7526.7706, 7526.7706, 7526.7706, 7526.7706, 7526.7706] +25-08-28 11:37:45 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:37:45 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:37:45 | D | - sum error = [ 7741.0935, 7781.2012, 7830.2391, 7887.6856] +25-08-28 11:37:45 | D | - best error = [ 7526.7706, 7526.7706, 7526.7706, 7526.7706] +25-08-28 11:37:45 | D | + error = 7526.7706 +25-08-28 11:37:45 | D | + scale = [min=1.0768, max=2.4413] +25-08-28 11:37:45 | D | - single_transformer_blocks.17.down_proj +25-08-28 11:37:45 | D | + w: sint4 +25-08-28 11:37:45 | D | + x: uint4 +25-08-28 11:37:45 | D | + y: None +25-08-28 11:37:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:37:45 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:37:48 | D | + x - AbsMax +25-08-28 11:37:48 | D | + x = [min=1.0547, max=25.3750] +25-08-28 11:37:48 | D | + w - AbsMax +25-08-28 11:37:48 | D | + w = [min=0.1118, max=1.1406] +25-08-28 11:37:48 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:37:50 | D | + finished calculating the original outputs, ram usage: 19.2 +25-08-28 11:39:50 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:39:50 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:39:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:39:50 | D | - sum error = [ 8361.1650, 8392.0987, 8433.8414, 8449.4061, 8503.5724] +25-08-28 11:39:50 | D | - best error = [ 8361.1650, 8361.1650, 8361.1650, 8361.1650, 8361.1650] +25-08-28 11:39:50 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:39:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:39:50 | D | - sum error = [ 8523.7419, 8623.3492, 8726.0882, 8789.8833, 8912.4416] +25-08-28 11:39:50 | D | - best error = [ 8361.1650, 8361.1650, 8361.1650, 8361.1650, 8361.1650] +25-08-28 11:39:50 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:39:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:39:50 | D | - sum error = [ 8992.4439, 9110.4129, 9258.1908, 9355.3681, 9524.6694] +25-08-28 11:39:50 | D | - best error = [ 8361.1650, 8361.1650, 8361.1650, 8361.1650, 8361.1650] +25-08-28 11:39:50 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:39:50 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:39:50 | D | - sum error = [ 9710.4340, 9955.7324, 10153.3600, 10406.5422, 10547.2578] +25-08-28 11:39:50 | D | - best error = [ 8361.1650, 8361.1650, 8361.1650, 8361.1650, 8361.1650] +25-08-28 11:39:50 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:39:50 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:39:50 | D | - sum error = [10056.9435, 9786.9064, 9493.7571, 9320.2266, 9182.5831] +25-08-28 11:39:50 | D | - best error = [ 8361.1650, 8361.1650, 8361.1650, 8361.1650, 8361.1650] +25-08-28 11:39:50 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:39:50 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:39:50 | D | - sum error = [ 9087.1368, 8955.0522, 8892.9239, 8914.2865, 8942.3576] +25-08-28 11:39:50 | D | - best error = [ 8361.1650, 8361.1650, 8361.1650, 8361.1650, 8361.1650] +25-08-28 11:39:50 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:39:50 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:39:50 | D | - sum error = [ 8984.3954, 9085.4697, 9181.5480, 9370.0433, 9462.5658] +25-08-28 11:39:50 | D | - best error = [ 8361.1650, 8361.1650, 8361.1650, 8361.1650, 8361.1650] +25-08-28 11:39:50 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:39:50 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:39:50 | D | - sum error = [ 9640.4124, 9915.2251, 10229.1552, 10472.2872] +25-08-28 11:39:50 | D | - best error = [ 8361.1650, 8361.1650, 8361.1650, 8361.1650] +25-08-28 11:39:50 | D | + error = 8361.1650 +25-08-28 11:39:50 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 11:40:09 | D | - Smoothing Diffusion Block single_transformer_blocks.18 +25-08-28 11:40:09 | D | - Skipping Module single_transformer_blocks.18.norm.linear +25-08-28 11:40:09 | D | - Smoothing Transformer Block single_transformer_blocks.18 +25-08-28 11:40:09 | D | - single_transformer_blocks.18.attn.qkv_proj + single_transformer_blocks.18.up_proj +25-08-28 11:40:09 | D | + w: sint4 +25-08-28 11:40:09 | D | + x: sint4 +25-08-28 11:40:09 | D | + y: None +25-08-28 11:40:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:40:09 | D | + finished parsing calibration arguments, ram usage: 17.2 +25-08-28 11:40:10 | D | + x - AbsMax +25-08-28 11:40:10 | D | + x = [min=0.1289, max=30.5000] +25-08-28 11:40:10 | D | + w - AbsMax +25-08-28 11:40:10 | D | + w = [min=0.1055, max=1.1250] +25-08-28 11:40:10 | D | + finished resetting calibrator, ram usage: 17.3 +25-08-28 11:40:11 | D | + finished calculating the original outputs, ram usage: 17.5 +25-08-28 11:42:21 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:42:21 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:42:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:42:21 | D | - sum error = [70594.0163, 68579.5650, 66736.1132, 65178.5683, 64097.2960] +25-08-28 11:42:21 | D | - best error = [70594.0163, 68579.5650, 66736.1132, 65178.5683, 64097.2960] +25-08-28 11:42:21 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:42:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:42:21 | D | - sum error = [63245.6200, 61881.4309, 60629.3521, 60160.9149, 59370.5478] +25-08-28 11:42:21 | D | - best error = [63245.6200, 61881.4309, 60629.3521, 60160.9149, 59370.5478] +25-08-28 11:42:21 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:42:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:42:21 | D | - sum error = [58619.2050, 57884.9375, 57975.8692, 57973.6989, 58917.8856] +25-08-28 11:42:21 | D | - best error = [58619.2050, 57884.9375, 57884.9375, 57884.9375, 57884.9375] +25-08-28 11:42:21 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:42:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:42:21 | D | - sum error = [57680.4660, 57400.4053, 57640.5613, 57757.8061, 57697.9412] +25-08-28 11:42:21 | D | - best error = [57680.4660, 57400.4053, 57400.4053, 57400.4053, 57400.4053] +25-08-28 11:42:21 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:42:21 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:42:21 | D | - sum error = [101328.0745, 96934.3702, 91301.7902, 87524.7531, 82926.3407] +25-08-28 11:42:21 | D | - best error = [57400.4053, 57400.4053, 57400.4053, 57400.4053, 57400.4053] +25-08-28 11:42:21 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:42:21 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:42:21 | D | - sum error = [78827.1141, 75537.5810, 72812.0535, 69342.8962, 66124.4403] +25-08-28 11:42:21 | D | - best error = [57400.4053, 57400.4053, 57400.4053, 57400.4053, 57400.4053] +25-08-28 11:42:21 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:42:21 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:42:21 | D | - sum error = [64571.2129, 62536.8428, 61386.8854, 59825.6732, 58900.6616] +25-08-28 11:42:21 | D | - best error = [57400.4053, 57400.4053, 57400.4053, 57400.4053, 57400.4053] +25-08-28 11:42:21 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:42:21 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:42:21 | D | - sum error = [58707.5235, 58564.1835, 58253.2600, 57866.9237] +25-08-28 11:42:21 | D | - best error = [57400.4053, 57400.4053, 57400.4053, 57400.4053] +25-08-28 11:42:21 | D | + error = 57400.4053 +25-08-28 11:42:21 | D | + scale = [min=0.1942, max=15.3971] +25-08-28 11:42:21 | D | - single_transformer_blocks.18.attn.out_proj +25-08-28 11:42:21 | D | + w: sint4 +25-08-28 11:42:21 | D | + x: sint4 +25-08-28 11:42:21 | D | + y: None +25-08-28 11:42:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:42:21 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:42:21 | D | + x - AbsMax +25-08-28 11:42:21 | D | + x = [min=0.9922, max=11.1875] +25-08-28 11:42:21 | D | + w - AbsMax +25-08-28 11:42:21 | D | + w = [min=0.1123, max=0.4375] +25-08-28 11:42:21 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:42:22 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:43:24 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:43:24 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:43:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:43:24 | D | - sum error = [ 6014.1683, 6004.9250, 5986.2593, 5967.5988, 5960.8811] +25-08-28 11:43:24 | D | - best error = [ 6014.1683, 6004.9250, 5986.2593, 5967.5988, 5960.8811] +25-08-28 11:43:24 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:43:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:43:24 | D | - sum error = [ 5957.2071, 5952.4845, 5957.3571, 5949.8612, 5989.8230] +25-08-28 11:43:24 | D | - best error = [ 5957.2071, 5952.4845, 5952.4845, 5949.8612, 5949.8612] +25-08-28 11:43:24 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:43:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:43:24 | D | - sum error = [ 6000.9143, 6024.1414, 6043.0727, 6076.7343, 6094.4066] +25-08-28 11:43:24 | D | - best error = [ 5949.8612, 5949.8612, 5949.8612, 5949.8612, 5949.8612] +25-08-28 11:43:24 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:43:24 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:43:24 | D | - sum error = [ 6151.8004, 6192.7868, 6225.1721, 6282.8334, 6340.3754] +25-08-28 11:43:24 | D | - best error = [ 5949.8612, 5949.8612, 5949.8612, 5949.8612, 5949.8612] +25-08-28 11:43:24 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:43:24 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:43:24 | D | - sum error = [ 6420.5492, 6361.5212, 6305.4031, 6257.3848, 6216.0798] +25-08-28 11:43:24 | D | - best error = [ 5949.8612, 5949.8612, 5949.8612, 5949.8612, 5949.8612] +25-08-28 11:43:24 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:43:24 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:43:24 | D | - sum error = [ 6176.9960, 6157.9272, 6141.3269, 6117.9526, 6115.8123] +25-08-28 11:43:24 | D | - best error = [ 5949.8612, 5949.8612, 5949.8612, 5949.8612, 5949.8612] +25-08-28 11:43:24 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:43:24 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:43:24 | D | - sum error = [ 6115.6874, 6105.4091, 6126.0444, 6147.1294, 6175.8931] +25-08-28 11:43:24 | D | - best error = [ 5949.8612, 5949.8612, 5949.8612, 5949.8612, 5949.8612] +25-08-28 11:43:24 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:43:24 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:43:24 | D | - sum error = [ 6210.8624, 6241.8722, 6297.7523, 6349.9285] +25-08-28 11:43:24 | D | - best error = [ 5949.8612, 5949.8612, 5949.8612, 5949.8612] +25-08-28 11:43:24 | D | + error = 5949.8612 +25-08-28 11:43:24 | D | + scale = [min=0.9969, max=2.6272] +25-08-28 11:43:24 | D | - single_transformer_blocks.18.down_proj +25-08-28 11:43:24 | D | + w: sint4 +25-08-28 11:43:24 | D | + x: uint4 +25-08-28 11:43:24 | D | + y: None +25-08-28 11:43:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:43:24 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:43:28 | D | + x - AbsMax +25-08-28 11:43:28 | D | + x = [min=1.0000, max=17.5000] +25-08-28 11:43:28 | D | + w - AbsMax +25-08-28 11:43:28 | D | + w = [min=0.1113, max=1.0938] +25-08-28 11:43:28 | D | + finished resetting calibrator, ram usage: 18.3 +25-08-28 11:43:30 | D | + finished calculating the original outputs, ram usage: 18.3 +25-08-28 11:45:26 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:45:26 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:45:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:45:26 | D | - sum error = [ 7734.8254, 7750.8317, 7774.8798, 7823.5877, 7837.6673] +25-08-28 11:45:26 | D | - best error = [ 7734.8254, 7734.8254, 7734.8254, 7734.8254, 7734.8254] +25-08-28 11:45:26 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:45:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:45:26 | D | - sum error = [ 7892.4346, 7923.1022, 8008.4555, 8087.4119, 8158.4043] +25-08-28 11:45:26 | D | - best error = [ 7734.8254, 7734.8254, 7734.8254, 7734.8254, 7734.8254] +25-08-28 11:45:26 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:45:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:45:26 | D | - sum error = [ 8259.1249, 8388.6657, 8491.3033, 8568.2194, 8694.4125] +25-08-28 11:45:26 | D | - best error = [ 7734.8254, 7734.8254, 7734.8254, 7734.8254, 7734.8254] +25-08-28 11:45:26 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:45:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:45:26 | D | - sum error = [ 8857.4353, 8983.4773, 9129.2729, 9275.2583, 9457.3341] +25-08-28 11:45:26 | D | - best error = [ 7734.8254, 7734.8254, 7734.8254, 7734.8254, 7734.8254] +25-08-28 11:45:26 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:45:26 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:45:26 | D | - sum error = [ 8896.1479, 8731.9634, 8587.3675, 8504.0127, 8421.3400] +25-08-28 11:45:26 | D | - best error = [ 7734.8254, 7734.8254, 7734.8254, 7734.8254, 7734.8254] +25-08-28 11:45:26 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:45:26 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:45:26 | D | - sum error = [ 8323.6482, 8253.2529, 8234.0274, 8274.8767, 8275.8270] +25-08-28 11:45:26 | D | - best error = [ 7734.8254, 7734.8254, 7734.8254, 7734.8254, 7734.8254] +25-08-28 11:45:26 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:45:26 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:45:26 | D | - sum error = [ 8343.3269, 8394.0012, 8490.1241, 8597.8816, 8699.7319] +25-08-28 11:45:26 | D | - best error = [ 7734.8254, 7734.8254, 7734.8254, 7734.8254, 7734.8254] +25-08-28 11:45:26 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:45:26 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:45:26 | D | - sum error = [ 8843.0474, 9012.4017, 9142.6718, 9368.7895] +25-08-28 11:45:26 | D | - best error = [ 7734.8254, 7734.8254, 7734.8254, 7734.8254] +25-08-28 11:45:26 | D | + error = 7734.8254 +25-08-28 11:45:26 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 11:45:46 | D | - Smoothing Diffusion Block single_transformer_blocks.19 +25-08-28 11:45:46 | D | - Skipping Module single_transformer_blocks.19.norm.linear +25-08-28 11:45:46 | D | - Smoothing Transformer Block single_transformer_blocks.19 +25-08-28 11:45:46 | D | - single_transformer_blocks.19.attn.qkv_proj + single_transformer_blocks.19.up_proj +25-08-28 11:45:46 | D | + w: sint4 +25-08-28 11:45:46 | D | + x: sint4 +25-08-28 11:45:46 | D | + y: None +25-08-28 11:45:46 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:45:46 | D | + finished parsing calibration arguments, ram usage: 17.4 +25-08-28 11:45:46 | D | + x - AbsMax +25-08-28 11:45:46 | D | + x = [min=0.2217, max=17.3750] +25-08-28 11:45:46 | D | + w - AbsMax +25-08-28 11:45:46 | D | + w = [min=0.0679, max=0.8672] +25-08-28 11:45:46 | D | + finished resetting calibrator, ram usage: 17.5 +25-08-28 11:45:48 | D | + finished calculating the original outputs, ram usage: 17.8 +25-08-28 11:47:54 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:47:54 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:47:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:47:54 | D | - sum error = [72200.5811, 70530.2649, 68025.2490, 66742.5036, 65411.4109] +25-08-28 11:47:54 | D | - best error = [72200.5811, 70530.2649, 68025.2490, 66742.5036, 65411.4109] +25-08-28 11:47:54 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:47:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:47:54 | D | - sum error = [63484.8507, 61333.2802, 59970.4332, 58799.0443, 57855.1084] +25-08-28 11:47:54 | D | - best error = [63484.8507, 61333.2802, 59970.4332, 58799.0443, 57855.1084] +25-08-28 11:47:54 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:47:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:47:54 | D | - sum error = [57072.1144, 56482.8600, 56258.3890, 55792.5621, 55586.7173] +25-08-28 11:47:54 | D | - best error = [57072.1144, 56482.8600, 56258.3890, 55792.5621, 55586.7173] +25-08-28 11:47:54 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:47:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:47:54 | D | - sum error = [55450.9473, 55575.2647, 55182.4714, 55231.6083, 54862.8895] +25-08-28 11:47:54 | D | - best error = [55450.9473, 55450.9473, 55182.4714, 55182.4714, 54862.8895] +25-08-28 11:47:54 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:47:54 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:47:54 | D | - sum error = [111118.0774, 103206.8860, 94724.6864, 89865.1363, 85317.2031] +25-08-28 11:47:54 | D | - best error = [54862.8895, 54862.8895, 54862.8895, 54862.8895, 54862.8895] +25-08-28 11:47:54 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:47:54 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:47:54 | D | - sum error = [80730.2891, 77028.9687, 73349.7380, 69842.2598, 66507.5101] +25-08-28 11:47:54 | D | - best error = [54862.8895, 54862.8895, 54862.8895, 54862.8895, 54862.8895] +25-08-28 11:47:54 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:47:54 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:47:54 | D | - sum error = [64066.4695, 61741.8004, 59835.5720, 58428.5475, 57340.1784] +25-08-28 11:47:54 | D | - best error = [54862.8895, 54862.8895, 54862.8895, 54862.8895, 54862.8895] +25-08-28 11:47:54 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:47:54 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:47:54 | D | - sum error = [56283.3403, 55796.7879, 55571.8421, 54986.1248] +25-08-28 11:47:54 | D | - best error = [54862.8895, 54862.8895, 54862.8895, 54862.8895] +25-08-28 11:47:54 | D | + error = 54862.8895 +25-08-28 11:47:54 | D | + scale = [min=0.2390, max=15.0636] +25-08-28 11:47:55 | D | - single_transformer_blocks.19.attn.out_proj +25-08-28 11:47:55 | D | + w: sint4 +25-08-28 11:47:55 | D | + x: sint4 +25-08-28 11:47:55 | D | + y: None +25-08-28 11:47:55 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:47:55 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:47:55 | D | + x - AbsMax +25-08-28 11:47:55 | D | + x = [min=1.0469, max=7.3125] +25-08-28 11:47:55 | D | + w - AbsMax +25-08-28 11:47:55 | D | + w = [min=0.1196, max=0.4121] +25-08-28 11:47:55 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:47:56 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:48:54 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:48:54 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:48:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:48:54 | D | - sum error = [ 5183.2827, 5156.7100, 5149.0092, 5148.5744, 5142.3483] +25-08-28 11:48:54 | D | - best error = [ 5183.2827, 5156.7100, 5149.0092, 5148.5744, 5142.3483] +25-08-28 11:48:54 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:48:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:48:54 | D | - sum error = [ 5136.7581, 5134.5755, 5141.8749, 5139.8300, 5146.8804] +25-08-28 11:48:54 | D | - best error = [ 5136.7581, 5134.5755, 5134.5755, 5134.5755, 5134.5755] +25-08-28 11:48:54 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:48:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:48:54 | D | - sum error = [ 5163.7413, 5177.9351, 5201.4148, 5219.6950, 5242.0960] +25-08-28 11:48:54 | D | - best error = [ 5134.5755, 5134.5755, 5134.5755, 5134.5755, 5134.5755] +25-08-28 11:48:54 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:48:54 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:48:54 | D | - sum error = [ 5278.4778, 5309.2764, 5345.9299, 5383.0211, 5418.5688] +25-08-28 11:48:54 | D | - best error = [ 5134.5755, 5134.5755, 5134.5755, 5134.5755, 5134.5755] +25-08-28 11:48:54 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:48:54 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:48:54 | D | - sum error = [ 5523.7402, 5463.6034, 5416.7490, 5371.7764, 5351.9191] +25-08-28 11:48:54 | D | - best error = [ 5134.5755, 5134.5755, 5134.5755, 5134.5755, 5134.5755] +25-08-28 11:48:54 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:48:54 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:48:54 | D | - sum error = [ 5324.4267, 5307.3393, 5287.9762, 5277.6250, 5276.7092] +25-08-28 11:48:54 | D | - best error = [ 5134.5755, 5134.5755, 5134.5755, 5134.5755, 5134.5755] +25-08-28 11:48:54 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:48:54 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:48:54 | D | - sum error = [ 5275.6194, 5268.6053, 5264.0916, 5279.2196, 5299.5917] +25-08-28 11:48:54 | D | - best error = [ 5134.5755, 5134.5755, 5134.5755, 5134.5755, 5134.5755] +25-08-28 11:48:54 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:48:54 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:48:54 | D | - sum error = [ 5327.7905, 5357.6624, 5392.3086, 5414.1117] +25-08-28 11:48:54 | D | - best error = [ 5134.5755, 5134.5755, 5134.5755, 5134.5755] +25-08-28 11:48:54 | D | + error = 5134.5755 +25-08-28 11:48:54 | D | + scale = [min=1.0138, max=1.8164] +25-08-28 11:48:54 | D | - single_transformer_blocks.19.down_proj +25-08-28 11:48:54 | D | + w: sint4 +25-08-28 11:48:54 | D | + x: uint4 +25-08-28 11:48:54 | D | + y: None +25-08-28 11:48:54 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:48:54 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:48:56 | D | + x - AbsMax +25-08-28 11:48:56 | D | + x = [min=1.0938, max=15.0000] +25-08-28 11:48:56 | D | + w - AbsMax +25-08-28 11:48:56 | D | + w = [min=0.1079, max=1.4297] +25-08-28 11:48:56 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:48:58 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:50:47 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:50:47 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:50:47 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:50:47 | D | - sum error = [ 7330.5020, 7330.2677, 7360.1946, 7424.9183, 7454.9042] +25-08-28 11:50:47 | D | - best error = [ 7330.5020, 7330.2677, 7330.2677, 7330.2677, 7330.2677] +25-08-28 11:50:47 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:50:47 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:50:47 | D | - sum error = [ 7491.7560, 7568.9043, 7662.5317, 7766.4414, 7815.5097] +25-08-28 11:50:47 | D | - best error = [ 7330.2677, 7330.2677, 7330.2677, 7330.2677, 7330.2677] +25-08-28 11:50:47 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:50:47 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:50:47 | D | - sum error = [ 7920.3263, 8049.5180, 8129.1980, 8225.7561, 8393.9597] +25-08-28 11:50:47 | D | - best error = [ 7330.2677, 7330.2677, 7330.2677, 7330.2677, 7330.2677] +25-08-28 11:50:47 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:50:47 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:50:47 | D | - sum error = [ 8531.8500, 8684.6623, 8806.7488, 9034.9924, 9199.6283] +25-08-28 11:50:47 | D | - best error = [ 7330.2677, 7330.2677, 7330.2677, 7330.2677, 7330.2677] +25-08-28 11:50:47 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:50:47 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:50:47 | D | - sum error = [ 8155.7337, 8002.5388, 7880.4797, 7805.9639, 7758.7484] +25-08-28 11:50:47 | D | - best error = [ 7330.2677, 7330.2677, 7330.2677, 7330.2677, 7330.2677] +25-08-28 11:50:47 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:50:47 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:50:47 | D | - sum error = [ 7716.1270, 7664.5279, 7654.4531, 7712.4180, 7733.2270] +25-08-28 11:50:47 | D | - best error = [ 7330.2677, 7330.2677, 7330.2677, 7330.2677, 7330.2677] +25-08-28 11:50:47 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:50:47 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:50:47 | D | - sum error = [ 7737.7798, 7836.1891, 7929.2418, 8108.6245, 8272.3893] +25-08-28 11:50:47 | D | - best error = [ 7330.2677, 7330.2677, 7330.2677, 7330.2677, 7330.2677] +25-08-28 11:50:47 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:50:47 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:50:47 | D | - sum error = [ 8500.9689, 8685.9496, 8877.9295, 9124.9631] +25-08-28 11:50:47 | D | - best error = [ 7330.2677, 7330.2677, 7330.2677, 7330.2677] +25-08-28 11:50:47 | D | + error = 7330.2677 +25-08-28 11:50:47 | D | + scale = [min=1.0045, max=1.1450] +25-08-28 11:51:05 | D | - Smoothing Diffusion Block single_transformer_blocks.20 +25-08-28 11:51:05 | D | - Skipping Module single_transformer_blocks.20.norm.linear +25-08-28 11:51:05 | D | - Smoothing Transformer Block single_transformer_blocks.20 +25-08-28 11:51:05 | D | - single_transformer_blocks.20.attn.qkv_proj + single_transformer_blocks.20.up_proj +25-08-28 11:51:05 | D | + w: sint4 +25-08-28 11:51:05 | D | + x: sint4 +25-08-28 11:51:05 | D | + y: None +25-08-28 11:51:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:51:05 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 11:51:05 | D | + x - AbsMax +25-08-28 11:51:05 | D | + x = [min=0.1533, max=19.3750] +25-08-28 11:51:05 | D | + w - AbsMax +25-08-28 11:51:05 | D | + w = [min=0.0947, max=0.8789] +25-08-28 11:51:05 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 11:51:07 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 11:53:13 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:53:13 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:53:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:53:13 | D | - sum error = [70517.3386, 69111.3143, 67636.2374, 66196.0086, 63958.1303] +25-08-28 11:53:13 | D | - best error = [70517.3386, 69111.3143, 67636.2374, 66196.0086, 63958.1303] +25-08-28 11:53:13 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:53:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:53:13 | D | - sum error = [63267.6984, 62339.4455, 61150.7629, 60305.7511, 59809.0808] +25-08-28 11:53:13 | D | - best error = [63267.6984, 62339.4455, 61150.7629, 60305.7511, 59809.0808] +25-08-28 11:53:13 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:53:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:53:13 | D | - sum error = [59030.3797, 58729.4379, 58261.5191, 58032.8690, 58333.7224] +25-08-28 11:53:13 | D | - best error = [59030.3797, 58729.4379, 58261.5191, 58032.8690, 58032.8690] +25-08-28 11:53:13 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:53:13 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:53:13 | D | - sum error = [58104.0061, 58045.3318, 58512.7066, 59015.1276, 59033.2376] +25-08-28 11:53:13 | D | - best error = [58032.8690, 58032.8690, 58032.8690, 58032.8690, 58032.8690] +25-08-28 11:53:13 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:53:13 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:53:13 | D | - sum error = [106651.8427, 101038.3823, 95003.0797, 89597.4514, 85290.0167] +25-08-28 11:53:13 | D | - best error = [58032.8690, 58032.8690, 58032.8690, 58032.8690, 58032.8690] +25-08-28 11:53:13 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:53:13 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:53:13 | D | - sum error = [81172.7103, 76824.1829, 73365.3372, 70900.9667, 67384.2503] +25-08-28 11:53:13 | D | - best error = [58032.8690, 58032.8690, 58032.8690, 58032.8690, 58032.8690] +25-08-28 11:53:13 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:53:13 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:53:13 | D | - sum error = [65765.6796, 63712.8157, 62027.4623, 60773.7312, 60012.3849] +25-08-28 11:53:13 | D | - best error = [58032.8690, 58032.8690, 58032.8690, 58032.8690, 58032.8690] +25-08-28 11:53:13 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:53:13 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:53:13 | D | - sum error = [59109.1649, 58544.0459, 58698.5160, 59105.1261] +25-08-28 11:53:13 | D | - best error = [58032.8690, 58032.8690, 58032.8690, 58032.8690] +25-08-28 11:53:13 | D | + error = 58032.8690 +25-08-28 11:53:13 | D | + scale = [min=0.2956, max=6.8661] +25-08-28 11:53:13 | D | - single_transformer_blocks.20.attn.out_proj +25-08-28 11:53:13 | D | + w: sint4 +25-08-28 11:53:13 | D | + x: sint4 +25-08-28 11:53:13 | D | + y: None +25-08-28 11:53:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:53:13 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 11:53:14 | D | + x - AbsMax +25-08-28 11:53:14 | D | + x = [min=0.8359, max=6.1562] +25-08-28 11:53:14 | D | + w - AbsMax +25-08-28 11:53:14 | D | + w = [min=0.1055, max=0.5586] +25-08-28 11:53:14 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 11:53:15 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 11:54:11 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:54:11 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:54:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:54:11 | D | - sum error = [ 5524.7677, 5514.5016, 5497.6106, 5486.6221, 5479.1976] +25-08-28 11:54:11 | D | - best error = [ 5524.7677, 5514.5016, 5497.6106, 5486.6221, 5479.1976] +25-08-28 11:54:11 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:54:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:54:11 | D | - sum error = [ 5473.9642, 5469.3402, 5467.1034, 5476.5179, 5491.5114] +25-08-28 11:54:11 | D | - best error = [ 5473.9642, 5469.3402, 5467.1034, 5467.1034, 5467.1034] +25-08-28 11:54:11 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:54:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:54:11 | D | - sum error = [ 5505.4547, 5506.5025, 5531.6909, 5532.5423, 5567.1195] +25-08-28 11:54:11 | D | - best error = [ 5467.1034, 5467.1034, 5467.1034, 5467.1034, 5467.1034] +25-08-28 11:54:11 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:54:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:54:11 | D | - sum error = [ 5598.5598, 5625.8717, 5651.0909, 5695.3007, 5721.4325] +25-08-28 11:54:11 | D | - best error = [ 5467.1034, 5467.1034, 5467.1034, 5467.1034, 5467.1034] +25-08-28 11:54:11 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:54:11 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:54:11 | D | - sum error = [ 6026.1759, 5954.0413, 5881.1587, 5815.4542, 5770.8746] +25-08-28 11:54:11 | D | - best error = [ 5467.1034, 5467.1034, 5467.1034, 5467.1034, 5467.1034] +25-08-28 11:54:11 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:54:11 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:54:11 | D | - sum error = [ 5737.7799, 5695.2910, 5666.9583, 5655.2155, 5632.6243] +25-08-28 11:54:11 | D | - best error = [ 5467.1034, 5467.1034, 5467.1034, 5467.1034, 5467.1034] +25-08-28 11:54:11 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:54:11 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:54:11 | D | - sum error = [ 5629.1625, 5619.9972, 5607.7942, 5620.1423, 5632.0039] +25-08-28 11:54:11 | D | - best error = [ 5467.1034, 5467.1034, 5467.1034, 5467.1034, 5467.1034] +25-08-28 11:54:11 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:54:11 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:54:11 | D | - sum error = [ 5645.3028, 5663.5065, 5698.3394, 5734.8405] +25-08-28 11:54:11 | D | - best error = [ 5467.1034, 5467.1034, 5467.1034, 5467.1034] +25-08-28 11:54:11 | D | + error = 5467.1034 +25-08-28 11:54:11 | D | + scale = [min=0.9392, max=1.8891] +25-08-28 11:54:11 | D | - single_transformer_blocks.20.down_proj +25-08-28 11:54:11 | D | + w: sint4 +25-08-28 11:54:11 | D | + x: uint4 +25-08-28 11:54:11 | D | + y: None +25-08-28 11:54:11 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:54:11 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 11:54:14 | D | + x - AbsMax +25-08-28 11:54:14 | D | + x = [min=0.8633, max=11.9375] +25-08-28 11:54:14 | D | + w - AbsMax +25-08-28 11:54:14 | D | + w = [min=0.1113, max=1.1484] +25-08-28 11:54:14 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 11:54:16 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 11:56:08 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:56:08 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:56:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:56:08 | D | - sum error = [ 7466.3769, 7456.1856, 7510.1883, 7523.0996, 7543.9928] +25-08-28 11:56:08 | D | - best error = [ 7466.3769, 7456.1856, 7456.1856, 7456.1856, 7456.1856] +25-08-28 11:56:08 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:56:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:56:08 | D | - sum error = [ 7593.9531, 7624.8242, 7666.1991, 7754.6394, 7871.9057] +25-08-28 11:56:08 | D | - best error = [ 7456.1856, 7456.1856, 7456.1856, 7456.1856, 7456.1856] +25-08-28 11:56:08 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:56:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:56:08 | D | - sum error = [ 7974.4527, 8056.5124, 8133.1647, 8210.1873, 8296.7994] +25-08-28 11:56:08 | D | - best error = [ 7456.1856, 7456.1856, 7456.1856, 7456.1856, 7456.1856] +25-08-28 11:56:08 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:56:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:56:08 | D | - sum error = [ 8432.4222, 8559.8818, 8744.2434, 8893.4421, 9019.9353] +25-08-28 11:56:08 | D | - best error = [ 7456.1856, 7456.1856, 7456.1856, 7456.1856, 7456.1856] +25-08-28 11:56:08 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:56:08 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:56:08 | D | - sum error = [ 8570.9379, 8406.8139, 8241.7294, 8137.3863, 8013.8563] +25-08-28 11:56:08 | D | - best error = [ 7456.1856, 7456.1856, 7456.1856, 7456.1856, 7456.1856] +25-08-28 11:56:08 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:56:08 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:56:08 | D | - sum error = [ 7948.8361, 7912.0286, 7876.5063, 7873.0218, 7885.1016] +25-08-28 11:56:08 | D | - best error = [ 7456.1856, 7456.1856, 7456.1856, 7456.1856, 7456.1856] +25-08-28 11:56:08 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:56:08 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:56:08 | D | - sum error = [ 7928.5832, 7951.3028, 8048.3081, 8146.9906, 8252.9151] +25-08-28 11:56:08 | D | - best error = [ 7456.1856, 7456.1856, 7456.1856, 7456.1856, 7456.1856] +25-08-28 11:56:08 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:56:08 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:56:08 | D | - sum error = [ 8419.4803, 8573.6121, 8750.6096, 8980.3791] +25-08-28 11:56:08 | D | - best error = [ 7456.1856, 7456.1856, 7456.1856, 7456.1856] +25-08-28 11:56:08 | D | + error = 7456.1856 +25-08-28 11:56:08 | D | + scale = [min=0.9927, max=1.1320] +25-08-28 11:56:26 | D | - Smoothing Diffusion Block single_transformer_blocks.21 +25-08-28 11:56:26 | D | - Skipping Module single_transformer_blocks.21.norm.linear +25-08-28 11:56:26 | D | - Smoothing Transformer Block single_transformer_blocks.21 +25-08-28 11:56:26 | D | - single_transformer_blocks.21.attn.qkv_proj + single_transformer_blocks.21.up_proj +25-08-28 11:56:26 | D | + w: sint4 +25-08-28 11:56:26 | D | + x: sint4 +25-08-28 11:56:26 | D | + y: None +25-08-28 11:56:26 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:56:26 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 11:56:27 | D | + x - AbsMax +25-08-28 11:56:27 | D | + x = [min=0.1611, max=23.1250] +25-08-28 11:56:27 | D | + w - AbsMax +25-08-28 11:56:27 | D | + w = [min=0.0654, max=0.9141] +25-08-28 11:56:27 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 11:56:28 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 11:58:34 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:58:34 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:58:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:58:34 | D | - sum error = [75435.4238, 72596.5050, 70348.7741, 68208.6061, 66762.2312] +25-08-28 11:58:34 | D | - best error = [75435.4238, 72596.5050, 70348.7741, 68208.6061, 66762.2312] +25-08-28 11:58:34 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:58:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:58:34 | D | - sum error = [66254.2941, 64137.6460, 62715.7917, 61675.3824, 61209.4801] +25-08-28 11:58:34 | D | - best error = [66254.2941, 64137.6460, 62715.7917, 61675.3824, 61209.4801] +25-08-28 11:58:34 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:58:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:58:34 | D | - sum error = [59901.3461, 59099.8201, 58636.7388, 58365.2477, 58160.1651] +25-08-28 11:58:34 | D | - best error = [59901.3461, 59099.8201, 58636.7388, 58365.2477, 58160.1651] +25-08-28 11:58:34 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:58:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:58:34 | D | - sum error = [57852.1968, 57763.9157, 57830.8618, 57815.9197, 58055.5938] +25-08-28 11:58:34 | D | - best error = [57852.1968, 57763.9157, 57763.9157, 57763.9157, 57763.9157] +25-08-28 11:58:34 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:58:34 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:58:34 | D | - sum error = [96699.0601, 91491.8334, 88463.7642, 83529.3543, 79994.7774] +25-08-28 11:58:34 | D | - best error = [57763.9157, 57763.9157, 57763.9157, 57763.9157, 57763.9157] +25-08-28 11:58:34 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:58:34 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:58:34 | D | - sum error = [76637.6618, 73072.8846, 70680.2784, 69053.1529, 65744.2667] +25-08-28 11:58:34 | D | - best error = [57763.9157, 57763.9157, 57763.9157, 57763.9157, 57763.9157] +25-08-28 11:58:34 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:58:34 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:58:34 | D | - sum error = [63650.1096, 62111.5219, 60884.5097, 59716.6000, 58950.8856] +25-08-28 11:58:34 | D | - best error = [57763.9157, 57763.9157, 57763.9157, 57763.9157, 57763.9157] +25-08-28 11:58:34 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:58:34 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:58:34 | D | - sum error = [58538.4791, 58254.6619, 58099.9062, 57925.0495] +25-08-28 11:58:34 | D | - best error = [57763.9157, 57763.9157, 57763.9157, 57763.9157] +25-08-28 11:58:34 | D | + error = 57763.9157 +25-08-28 11:58:34 | D | + scale = [min=0.2321, max=12.3386] +25-08-28 11:58:34 | D | - single_transformer_blocks.21.attn.out_proj +25-08-28 11:58:34 | D | + w: sint4 +25-08-28 11:58:34 | D | + x: sint4 +25-08-28 11:58:34 | D | + y: None +25-08-28 11:58:34 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:58:34 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 11:58:35 | D | + x - AbsMax +25-08-28 11:58:35 | D | + x = [min=0.9727, max=6.5000] +25-08-28 11:58:35 | D | + w - AbsMax +25-08-28 11:58:35 | D | + w = [min=0.1118, max=0.4004] +25-08-28 11:58:35 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 11:58:36 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 11:59:32 | D | - x / w range = AbsMax / AbsMax +25-08-28 11:59:32 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 11:59:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:59:32 | D | - sum error = [ 5139.3817, 5129.7992, 5116.4662, 5110.7068, 5092.5614] +25-08-28 11:59:32 | D | - best error = [ 5139.3817, 5129.7992, 5116.4662, 5110.7068, 5092.5614] +25-08-28 11:59:32 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 11:59:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:59:32 | D | - sum error = [ 5094.5494, 5093.3675, 5098.5108, 5090.0841, 5094.0075] +25-08-28 11:59:32 | D | - best error = [ 5092.5614, 5092.5614, 5092.5614, 5090.0841, 5090.0841] +25-08-28 11:59:32 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 11:59:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:59:32 | D | - sum error = [ 5105.6573, 5110.3495, 5114.8374, 5130.6602, 5151.8257] +25-08-28 11:59:32 | D | - best error = [ 5090.0841, 5090.0841, 5090.0841, 5090.0841, 5090.0841] +25-08-28 11:59:32 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:59:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 11:59:32 | D | - sum error = [ 5156.2166, 5187.3992, 5207.0333, 5255.0777, 5280.7159] +25-08-28 11:59:32 | D | - best error = [ 5090.0841, 5090.0841, 5090.0841, 5090.0841, 5090.0841] +25-08-28 11:59:32 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 11:59:32 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 11:59:32 | D | - sum error = [ 5581.8266, 5518.6475, 5468.4217, 5420.5966, 5375.5426] +25-08-28 11:59:32 | D | - best error = [ 5090.0841, 5090.0841, 5090.0841, 5090.0841, 5090.0841] +25-08-28 11:59:32 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 11:59:32 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 11:59:32 | D | - sum error = [ 5324.8632, 5298.8750, 5273.4292, 5249.5129, 5232.1797] +25-08-28 11:59:32 | D | - best error = [ 5090.0841, 5090.0841, 5090.0841, 5090.0841, 5090.0841] +25-08-28 11:59:32 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 11:59:32 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 11:59:32 | D | - sum error = [ 5204.0630, 5197.1908, 5189.9459, 5198.7395, 5189.7996] +25-08-28 11:59:32 | D | - best error = [ 5090.0841, 5090.0841, 5090.0841, 5090.0841, 5090.0841] +25-08-28 11:59:32 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 11:59:32 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 11:59:32 | D | - sum error = [ 5194.4898, 5219.4837, 5254.7668, 5280.5727] +25-08-28 11:59:32 | D | - best error = [ 5090.0841, 5090.0841, 5090.0841, 5090.0841] +25-08-28 11:59:32 | D | + error = 5090.0841 +25-08-28 11:59:32 | D | + scale = [min=0.9890, max=2.1143] +25-08-28 11:59:32 | D | - single_transformer_blocks.21.down_proj +25-08-28 11:59:32 | D | + w: sint4 +25-08-28 11:59:32 | D | + x: uint4 +25-08-28 11:59:32 | D | + y: None +25-08-28 11:59:32 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 11:59:32 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 11:59:34 | D | + x - AbsMax +25-08-28 11:59:34 | D | + x = [min=0.9531, max=15.0625] +25-08-28 11:59:34 | D | + w - AbsMax +25-08-28 11:59:34 | D | + w = [min=0.1074, max=1.4141] +25-08-28 11:59:34 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 11:59:36 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:01:26 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:01:26 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:01:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:01:26 | D | - sum error = [ 7098.7066, 7089.9171, 7105.3056, 7123.4368, 7140.3874] +25-08-28 12:01:26 | D | - best error = [ 7098.7066, 7089.9171, 7089.9171, 7089.9171, 7089.9171] +25-08-28 12:01:26 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:01:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:01:26 | D | - sum error = [ 7155.8753, 7184.9727, 7238.5203, 7277.7876, 7334.2760] +25-08-28 12:01:26 | D | - best error = [ 7089.9171, 7089.9171, 7089.9171, 7089.9171, 7089.9171] +25-08-28 12:01:26 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:01:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:01:26 | D | - sum error = [ 7423.0253, 7501.1578, 7579.9694, 7730.4799, 7794.8024] +25-08-28 12:01:26 | D | - best error = [ 7089.9171, 7089.9171, 7089.9171, 7089.9171, 7089.9171] +25-08-28 12:01:26 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:01:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:01:26 | D | - sum error = [ 7908.0496, 8025.2093, 8134.7059, 8255.8556, 8394.2290] +25-08-28 12:01:26 | D | - best error = [ 7089.9171, 7089.9171, 7089.9171, 7089.9171, 7089.9171] +25-08-28 12:01:26 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:01:26 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:01:26 | D | - sum error = [ 8082.0634, 7935.8977, 7812.6055, 7698.9152, 7613.9976] +25-08-28 12:01:26 | D | - best error = [ 7089.9171, 7089.9171, 7089.9171, 7089.9171, 7089.9171] +25-08-28 12:01:26 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:01:26 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:01:26 | D | - sum error = [ 7516.2358, 7456.0013, 7433.3556, 7432.7828, 7433.6309] +25-08-28 12:01:26 | D | - best error = [ 7089.9171, 7089.9171, 7089.9171, 7089.9171, 7089.9171] +25-08-28 12:01:26 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:01:26 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:01:26 | D | - sum error = [ 7472.8046, 7498.2171, 7553.0060, 7630.2537, 7726.8501] +25-08-28 12:01:26 | D | - best error = [ 7089.9171, 7089.9171, 7089.9171, 7089.9171, 7089.9171] +25-08-28 12:01:26 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:01:26 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:01:26 | D | - sum error = [ 7844.1594, 7955.1204, 8177.3849, 8354.5178] +25-08-28 12:01:26 | D | - best error = [ 7089.9171, 7089.9171, 7089.9171, 7089.9171] +25-08-28 12:01:26 | D | + error = 7089.9171 +25-08-28 12:01:26 | D | + scale = [min=0.9976, max=1.1452] +25-08-28 12:01:44 | D | - Smoothing Diffusion Block single_transformer_blocks.22 +25-08-28 12:01:44 | D | - Skipping Module single_transformer_blocks.22.norm.linear +25-08-28 12:01:44 | D | - Smoothing Transformer Block single_transformer_blocks.22 +25-08-28 12:01:44 | D | - single_transformer_blocks.22.attn.qkv_proj + single_transformer_blocks.22.up_proj +25-08-28 12:01:44 | D | + w: sint4 +25-08-28 12:01:44 | D | + x: sint4 +25-08-28 12:01:44 | D | + y: None +25-08-28 12:01:44 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:01:44 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:01:45 | D | + x - AbsMax +25-08-28 12:01:45 | D | + x = [min=0.1641, max=16.0000] +25-08-28 12:01:45 | D | + w - AbsMax +25-08-28 12:01:45 | D | + w = [min=0.0796, max=0.7031] +25-08-28 12:01:45 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:01:46 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:03:52 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:03:52 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:03:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:03:52 | D | - sum error = [80073.9434, 77826.5164, 76105.8833, 74377.0872, 72920.0943] +25-08-28 12:03:52 | D | - best error = [80073.9434, 77826.5164, 76105.8833, 74377.0872, 72920.0943] +25-08-28 12:03:52 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:03:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:03:52 | D | - sum error = [71410.2460, 70169.4844, 68935.5316, 68014.7259, 67000.0989] +25-08-28 12:03:52 | D | - best error = [71410.2460, 70169.4844, 68935.5316, 68014.7259, 67000.0989] +25-08-28 12:03:52 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:03:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:03:52 | D | - sum error = [66410.1698, 65885.2569, 65336.3855, 64778.0229, 64812.2221] +25-08-28 12:03:52 | D | - best error = [66410.1698, 65885.2569, 65336.3855, 64778.0229, 64778.0229] +25-08-28 12:03:52 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:03:52 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:03:52 | D | - sum error = [64417.0810, 64361.7019, 64307.0833, 64359.2000, 64596.6587] +25-08-28 12:03:52 | D | - best error = [64417.0810, 64361.7019, 64307.0833, 64307.0833, 64307.0833] +25-08-28 12:03:52 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:03:52 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:03:52 | D | - sum error = [118286.2026, 112524.0176, 107032.4757, 101710.5842, 96573.6567] +25-08-28 12:03:52 | D | - best error = [64307.0833, 64307.0833, 64307.0833, 64307.0833, 64307.0833] +25-08-28 12:03:52 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:03:52 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:03:52 | D | - sum error = [92078.0191, 87672.5369, 83672.6421, 79246.3864, 76178.2600] +25-08-28 12:03:52 | D | - best error = [64307.0833, 64307.0833, 64307.0833, 64307.0833, 64307.0833] +25-08-28 12:03:52 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:03:52 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:03:52 | D | - sum error = [73741.3911, 71344.7876, 69487.8471, 68044.5670, 66815.6475] +25-08-28 12:03:52 | D | - best error = [64307.0833, 64307.0833, 64307.0833, 64307.0833, 64307.0833] +25-08-28 12:03:52 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:03:52 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:03:52 | D | - sum error = [65866.8912, 65218.1001, 64837.2555, 64626.5966] +25-08-28 12:03:52 | D | - best error = [64307.0833, 64307.0833, 64307.0833, 64307.0833] +25-08-28 12:03:52 | D | + error = 64307.0833 +25-08-28 12:03:52 | D | + scale = [min=0.2152, max=10.5561] +25-08-28 12:03:52 | D | - single_transformer_blocks.22.attn.out_proj +25-08-28 12:03:52 | D | + w: sint4 +25-08-28 12:03:52 | D | + x: sint4 +25-08-28 12:03:52 | D | + y: None +25-08-28 12:03:52 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:03:52 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:03:53 | D | + x - AbsMax +25-08-28 12:03:53 | D | + x = [min=1.2656, max=7.1250] +25-08-28 12:03:53 | D | + w - AbsMax +25-08-28 12:03:53 | D | + w = [min=0.1211, max=0.4121] +25-08-28 12:03:53 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:03:54 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:04:51 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:04:51 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:04:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:04:51 | D | - sum error = [ 5430.7753, 5422.3858, 5418.5791, 5413.3134, 5416.8546] +25-08-28 12:04:51 | D | - best error = [ 5430.7753, 5422.3858, 5418.5791, 5413.3134, 5413.3134] +25-08-28 12:04:51 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:04:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:04:51 | D | - sum error = [ 5403.7953, 5401.4402, 5394.5307, 5406.0919, 5401.5540] +25-08-28 12:04:51 | D | - best error = [ 5403.7953, 5401.4402, 5394.5307, 5394.5307, 5394.5307] +25-08-28 12:04:51 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:04:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:04:51 | D | - sum error = [ 5405.4717, 5402.6394, 5406.6315, 5414.7430, 5428.6846] +25-08-28 12:04:51 | D | - best error = [ 5394.5307, 5394.5307, 5394.5307, 5394.5307, 5394.5307] +25-08-28 12:04:51 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:04:51 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:04:51 | D | - sum error = [ 5437.6074, 5440.2165, 5463.1634, 5471.8556, 5501.6439] +25-08-28 12:04:51 | D | - best error = [ 5394.5307, 5394.5307, 5394.5307, 5394.5307, 5394.5307] +25-08-28 12:04:51 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:04:51 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:04:51 | D | - sum error = [ 5806.4695, 5758.4269, 5714.5911, 5676.7500, 5641.7079] +25-08-28 12:04:51 | D | - best error = [ 5394.5307, 5394.5307, 5394.5307, 5394.5307, 5394.5307] +25-08-28 12:04:51 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:04:51 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:04:51 | D | - sum error = [ 5617.1014, 5590.2258, 5583.1093, 5556.1093, 5539.6361] +25-08-28 12:04:51 | D | - best error = [ 5394.5307, 5394.5307, 5394.5307, 5394.5307, 5394.5307] +25-08-28 12:04:51 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:04:51 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:04:51 | D | - sum error = [ 5522.9398, 5499.6791, 5480.8195, 5480.3788, 5479.3730] +25-08-28 12:04:51 | D | - best error = [ 5394.5307, 5394.5307, 5394.5307, 5394.5307, 5394.5307] +25-08-28 12:04:51 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:04:51 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:04:51 | D | - sum error = [ 5475.5051, 5473.9636, 5487.0132, 5501.7252] +25-08-28 12:04:51 | D | - best error = [ 5394.5307, 5394.5307, 5394.5307, 5394.5307] +25-08-28 12:04:51 | D | + error = 5394.5307 +25-08-28 12:04:51 | D | + scale = [min=1.0859, max=1.9883] +25-08-28 12:04:51 | D | - single_transformer_blocks.22.down_proj +25-08-28 12:04:51 | D | + w: sint4 +25-08-28 12:04:51 | D | + x: uint4 +25-08-28 12:04:51 | D | + y: None +25-08-28 12:04:51 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:04:51 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:04:54 | D | + x - AbsMax +25-08-28 12:04:54 | D | + x = [min=0.8594, max=10.3750] +25-08-28 12:04:54 | D | + w - AbsMax +25-08-28 12:04:54 | D | + w = [min=0.1118, max=0.8711] +25-08-28 12:04:54 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:04:56 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:07:06 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:07:06 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:07:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:07:06 | D | - sum error = [ 7100.9154, 7118.3164, 7114.3992, 7163.8138, 7191.3406] +25-08-28 12:07:06 | D | - best error = [ 7100.9154, 7100.9154, 7100.9154, 7100.9154, 7100.9154] +25-08-28 12:07:06 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:07:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:07:06 | D | - sum error = [ 7220.6266, 7269.4979, 7322.4734, 7413.9439, 7480.4470] +25-08-28 12:07:06 | D | - best error = [ 7100.9154, 7100.9154, 7100.9154, 7100.9154, 7100.9154] +25-08-28 12:07:06 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:07:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:07:06 | D | - sum error = [ 7552.8554, 7660.0089, 7749.9283, 7814.9958, 7957.1394] +25-08-28 12:07:06 | D | - best error = [ 7100.9154, 7100.9154, 7100.9154, 7100.9154, 7100.9154] +25-08-28 12:07:06 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:07:06 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:07:06 | D | - sum error = [ 8067.5692, 8237.8182, 8370.2507, 8548.1778, 8735.6950] +25-08-28 12:07:06 | D | - best error = [ 7100.9154, 7100.9154, 7100.9154, 7100.9154, 7100.9154] +25-08-28 12:07:06 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:07:06 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:07:06 | D | - sum error = [ 8106.2107, 7958.7049, 7839.1153, 7716.7055, 7655.2897] +25-08-28 12:07:06 | D | - best error = [ 7100.9154, 7100.9154, 7100.9154, 7100.9154, 7100.9154] +25-08-28 12:07:06 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:07:06 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:07:06 | D | - sum error = [ 7627.6455, 7549.3737, 7515.6775, 7520.4622, 7555.7994] +25-08-28 12:07:06 | D | - best error = [ 7100.9154, 7100.9154, 7100.9154, 7100.9154, 7100.9154] +25-08-28 12:07:06 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:07:06 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:07:06 | D | - sum error = [ 7639.9283, 7709.9917, 7746.0044, 7860.8727, 7963.6434] +25-08-28 12:07:06 | D | - best error = [ 7100.9154, 7100.9154, 7100.9154, 7100.9154, 7100.9154] +25-08-28 12:07:06 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:07:06 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:07:06 | D | - sum error = [ 8072.1658, 8260.3506, 8458.6051, 8658.0395] +25-08-28 12:07:06 | D | - best error = [ 7100.9154, 7100.9154, 7100.9154, 7100.9154] +25-08-28 12:07:06 | D | + error = 7100.9154 +25-08-28 12:07:06 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 12:07:24 | D | - Smoothing Diffusion Block single_transformer_blocks.23 +25-08-28 12:07:24 | D | - Skipping Module single_transformer_blocks.23.norm.linear +25-08-28 12:07:24 | D | - Smoothing Transformer Block single_transformer_blocks.23 +25-08-28 12:07:24 | D | - single_transformer_blocks.23.attn.qkv_proj + single_transformer_blocks.23.up_proj +25-08-28 12:07:24 | D | + w: sint4 +25-08-28 12:07:24 | D | + x: sint4 +25-08-28 12:07:24 | D | + y: None +25-08-28 12:07:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:07:24 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:07:24 | D | + x - AbsMax +25-08-28 12:07:24 | D | + x = [min=0.1836, max=22.7500] +25-08-28 12:07:24 | D | + w - AbsMax +25-08-28 12:07:24 | D | + w = [min=0.0913, max=0.7578] +25-08-28 12:07:24 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:07:26 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:09:31 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:09:31 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:09:31 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:09:31 | D | - sum error = [90312.2718, 87434.4269, 84703.6192, 81899.1724, 79877.1345] +25-08-28 12:09:31 | D | - best error = [90312.2718, 87434.4269, 84703.6192, 81899.1724, 79877.1345] +25-08-28 12:09:31 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:09:31 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:09:31 | D | - sum error = [77435.1536, 75024.7046, 73433.9379, 71866.1796, 70428.0526] +25-08-28 12:09:31 | D | - best error = [77435.1536, 75024.7046, 73433.9379, 71866.1796, 70428.0526] +25-08-28 12:09:31 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:09:31 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:09:31 | D | - sum error = [69251.7374, 68618.4209, 67943.9204, 67337.7369, 66878.5914] +25-08-28 12:09:31 | D | - best error = [69251.7374, 68618.4209, 67943.9204, 67337.7369, 66878.5914] +25-08-28 12:09:31 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:09:31 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:09:31 | D | - sum error = [66471.4565, 65327.4377, 65111.3193, 65182.2818, 65467.0106] +25-08-28 12:09:31 | D | - best error = [66471.4565, 65327.4377, 65111.3193, 65111.3193, 65111.3193] +25-08-28 12:09:31 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:09:31 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:09:31 | D | - sum error = [144203.2912, 135600.2558, 123748.5885, 116916.2731, 111703.4291] +25-08-28 12:09:31 | D | - best error = [65111.3193, 65111.3193, 65111.3193, 65111.3193, 65111.3193] +25-08-28 12:09:31 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:09:31 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:09:31 | D | - sum error = [104434.1216, 98534.2304, 92509.7585, 87474.9661, 82895.1121] +25-08-28 12:09:31 | D | - best error = [65111.3193, 65111.3193, 65111.3193, 65111.3193, 65111.3193] +25-08-28 12:09:31 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:09:31 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:09:31 | D | - sum error = [79189.7496, 76591.8245, 73020.3820, 70765.1150, 69245.3197] +25-08-28 12:09:31 | D | - best error = [65111.3193, 65111.3193, 65111.3193, 65111.3193, 65111.3193] +25-08-28 12:09:31 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:09:31 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:09:31 | D | - sum error = [67920.8349, 66551.4187, 65898.0080, 65469.7712] +25-08-28 12:09:31 | D | - best error = [65111.3193, 65111.3193, 65111.3193, 65111.3193] +25-08-28 12:09:31 | D | + error = 65111.3193 +25-08-28 12:09:31 | D | + scale = [min=0.2367, max=14.2375] +25-08-28 12:09:31 | D | - single_transformer_blocks.23.attn.out_proj +25-08-28 12:09:31 | D | + w: sint4 +25-08-28 12:09:31 | D | + x: sint4 +25-08-28 12:09:31 | D | + y: None +25-08-28 12:09:31 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:09:31 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:09:32 | D | + x - AbsMax +25-08-28 12:09:32 | D | + x = [min=0.9180, max=6.8750] +25-08-28 12:09:32 | D | + w - AbsMax +25-08-28 12:09:32 | D | + w = [min=0.1191, max=0.4102] +25-08-28 12:09:32 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:09:33 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:10:32 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:10:32 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:10:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:10:32 | D | - sum error = [ 5456.7959, 5429.3243, 5419.3819, 5412.3463, 5401.1854] +25-08-28 12:10:32 | D | - best error = [ 5456.7959, 5429.3243, 5419.3819, 5412.3463, 5401.1854] +25-08-28 12:10:32 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:10:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:10:32 | D | - sum error = [ 5384.7007, 5384.3098, 5384.2348, 5385.4281, 5391.1528] +25-08-28 12:10:32 | D | - best error = [ 5384.7007, 5384.3098, 5384.2348, 5384.2348, 5384.2348] +25-08-28 12:10:32 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:10:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:10:32 | D | - sum error = [ 5395.0196, 5421.5594, 5424.7330, 5440.3094, 5476.1167] +25-08-28 12:10:32 | D | - best error = [ 5384.2348, 5384.2348, 5384.2348, 5384.2348, 5384.2348] +25-08-28 12:10:32 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:10:32 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:10:32 | D | - sum error = [ 5506.7326, 5548.9352, 5580.0656, 5612.4430, 5654.1219] +25-08-28 12:10:32 | D | - best error = [ 5384.2348, 5384.2348, 5384.2348, 5384.2348, 5384.2348] +25-08-28 12:10:32 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:10:32 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:10:32 | D | - sum error = [ 5974.5634, 5900.4142, 5831.3079, 5767.1969, 5719.3185] +25-08-28 12:10:32 | D | - best error = [ 5384.2348, 5384.2348, 5384.2348, 5384.2348, 5384.2348] +25-08-28 12:10:32 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:10:32 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:10:32 | D | - sum error = [ 5674.1343, 5628.1466, 5591.0629, 5571.2040, 5550.5301] +25-08-28 12:10:32 | D | - best error = [ 5384.2348, 5384.2348, 5384.2348, 5384.2348, 5384.2348] +25-08-28 12:10:32 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:10:32 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:10:32 | D | - sum error = [ 5540.0565, 5526.2828, 5533.4378, 5538.9048, 5544.6354] +25-08-28 12:10:32 | D | - best error = [ 5384.2348, 5384.2348, 5384.2348, 5384.2348, 5384.2348] +25-08-28 12:10:32 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:10:32 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:10:32 | D | - sum error = [ 5565.4098, 5588.7729, 5620.4359, 5652.3790] +25-08-28 12:10:32 | D | - best error = [ 5384.2348, 5384.2348, 5384.2348, 5384.2348] +25-08-28 12:10:32 | D | + error = 5384.2348 +25-08-28 12:10:32 | D | + scale = [min=0.9705, max=1.9636] +25-08-28 12:10:32 | D | - single_transformer_blocks.23.down_proj +25-08-28 12:10:32 | D | + w: sint4 +25-08-28 12:10:32 | D | + x: uint4 +25-08-28 12:10:32 | D | + y: None +25-08-28 12:10:32 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:10:32 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:10:36 | D | + x - AbsMax +25-08-28 12:10:36 | D | + x = [min=0.8047, max=11.3750] +25-08-28 12:10:36 | D | + w - AbsMax +25-08-28 12:10:36 | D | + w = [min=0.1074, max=1.0547] +25-08-28 12:10:36 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:10:38 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:12:37 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:12:37 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:12:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:12:37 | D | - sum error = [ 6600.0782, 6581.4114, 6607.8935, 6636.5560, 6658.7828] +25-08-28 12:12:37 | D | - best error = [ 6600.0782, 6581.4114, 6581.4114, 6581.4114, 6581.4114] +25-08-28 12:12:37 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:12:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:12:37 | D | - sum error = [ 6689.3602, 6707.4305, 6751.2491, 6803.0323, 6879.4884] +25-08-28 12:12:37 | D | - best error = [ 6581.4114, 6581.4114, 6581.4114, 6581.4114, 6581.4114] +25-08-28 12:12:37 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:12:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:12:37 | D | - sum error = [ 6985.0065, 7076.0616, 7148.2613, 7222.0523, 7291.1620] +25-08-28 12:12:37 | D | - best error = [ 6581.4114, 6581.4114, 6581.4114, 6581.4114, 6581.4114] +25-08-28 12:12:37 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:12:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:12:37 | D | - sum error = [ 7416.7243, 7583.7136, 7691.9490, 7842.3848, 7977.5874] +25-08-28 12:12:37 | D | - best error = [ 6581.4114, 6581.4114, 6581.4114, 6581.4114, 6581.4114] +25-08-28 12:12:37 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:12:37 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:12:37 | D | - sum error = [ 7358.4436, 7239.3942, 7109.0604, 7015.3170, 6953.3054] +25-08-28 12:12:37 | D | - best error = [ 6581.4114, 6581.4114, 6581.4114, 6581.4114, 6581.4114] +25-08-28 12:12:37 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:12:37 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:12:37 | D | - sum error = [ 6888.0360, 6856.6359, 6871.7401, 6861.8240, 6900.2310] +25-08-28 12:12:37 | D | - best error = [ 6581.4114, 6581.4114, 6581.4114, 6581.4114, 6581.4114] +25-08-28 12:12:37 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:12:37 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:12:37 | D | - sum error = [ 6927.1293, 7000.4257, 7050.6005, 7147.3760, 7306.5015] +25-08-28 12:12:37 | D | - best error = [ 6581.4114, 6581.4114, 6581.4114, 6581.4114, 6581.4114] +25-08-28 12:12:37 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:12:37 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:12:37 | D | - sum error = [ 7449.0585, 7557.9458, 7740.6687, 7914.0988] +25-08-28 12:12:37 | D | - best error = [ 6581.4114, 6581.4114, 6581.4114, 6581.4114] +25-08-28 12:12:37 | D | + error = 6581.4114 +25-08-28 12:12:37 | D | + scale = [min=0.9892, max=1.1293] +25-08-28 12:12:55 | D | - Smoothing Diffusion Block single_transformer_blocks.24 +25-08-28 12:12:55 | D | - Skipping Module single_transformer_blocks.24.norm.linear +25-08-28 12:12:55 | D | - Smoothing Transformer Block single_transformer_blocks.24 +25-08-28 12:12:55 | D | - single_transformer_blocks.24.attn.qkv_proj + single_transformer_blocks.24.up_proj +25-08-28 12:12:55 | D | + w: sint4 +25-08-28 12:12:55 | D | + x: sint4 +25-08-28 12:12:55 | D | + y: None +25-08-28 12:12:55 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:12:55 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:12:56 | D | + x - AbsMax +25-08-28 12:12:56 | D | + x = [min=0.0981, max=19.1250] +25-08-28 12:12:56 | D | + w - AbsMax +25-08-28 12:12:56 | D | + w = [min=0.0781, max=1.0938] +25-08-28 12:12:56 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:12:57 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:15:03 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:15:03 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:15:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:15:03 | D | - sum error = [94956.3592, 91333.2696, 88080.5421, 85753.0355, 82784.7607] +25-08-28 12:15:03 | D | - best error = [94956.3592, 91333.2696, 88080.5421, 85753.0355, 82784.7607] +25-08-28 12:15:03 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:15:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:15:03 | D | - sum error = [79825.5025, 77930.6670, 75739.4743, 74104.3347, 72669.4143] +25-08-28 12:15:03 | D | - best error = [79825.5025, 77930.6670, 75739.4743, 74104.3347, 72669.4143] +25-08-28 12:15:03 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:15:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:15:03 | D | - sum error = [71442.2987, 70211.7775, 69380.6714, 68604.0148, 67843.7923] +25-08-28 12:15:03 | D | - best error = [71442.2987, 70211.7775, 69380.6714, 68604.0148, 67843.7923] +25-08-28 12:15:03 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:15:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:15:03 | D | - sum error = [67388.6191, 67351.7525, 66997.2026, 66793.0650, 66986.9690] +25-08-28 12:15:03 | D | - best error = [67388.6191, 67351.7525, 66997.2026, 66793.0650, 66793.0650] +25-08-28 12:15:03 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:15:03 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:15:03 | D | - sum error = [125763.6966, 119922.8168, 113821.1616, 107670.3901, 101990.5414] +25-08-28 12:15:03 | D | - best error = [66793.0650, 66793.0650, 66793.0650, 66793.0650, 66793.0650] +25-08-28 12:15:03 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:15:03 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:15:03 | D | - sum error = [96888.7184, 92140.4313, 87706.3172, 84015.9771, 80569.9474] +25-08-28 12:15:03 | D | - best error = [66793.0650, 66793.0650, 66793.0650, 66793.0650, 66793.0650] +25-08-28 12:15:03 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:15:03 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:15:03 | D | - sum error = [76920.1327, 74586.8930, 72518.4816, 70696.0249, 69400.5897] +25-08-28 12:15:03 | D | - best error = [66793.0650, 66793.0650, 66793.0650, 66793.0650, 66793.0650] +25-08-28 12:15:03 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:15:03 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:15:03 | D | - sum error = [68746.8111, 68037.2002, 67366.9621, 67102.3443] +25-08-28 12:15:03 | D | - best error = [66793.0650, 66793.0650, 66793.0650, 66793.0650] +25-08-28 12:15:03 | D | + error = 66793.0650 +25-08-28 12:15:03 | D | + scale = [min=0.1238, max=14.2377] +25-08-28 12:15:03 | D | - single_transformer_blocks.24.attn.out_proj +25-08-28 12:15:03 | D | + w: sint4 +25-08-28 12:15:03 | D | + x: sint4 +25-08-28 12:15:03 | D | + y: None +25-08-28 12:15:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:15:03 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:15:04 | D | + x - AbsMax +25-08-28 12:15:04 | D | + x = [min=0.8945, max=5.4375] +25-08-28 12:15:04 | D | + w - AbsMax +25-08-28 12:15:04 | D | + w = [min=0.1191, max=0.3711] +25-08-28 12:15:04 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:15:05 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:16:04 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:16:04 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:16:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:16:04 | D | - sum error = [ 5184.8202, 5169.8015, 5149.3889, 5126.5433, 5110.6647] +25-08-28 12:16:04 | D | - best error = [ 5184.8202, 5169.8015, 5149.3889, 5126.5433, 5110.6647] +25-08-28 12:16:04 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:16:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:16:04 | D | - sum error = [ 5110.4828, 5097.8533, 5094.7045, 5098.2040, 5109.4189] +25-08-28 12:16:04 | D | - best error = [ 5110.4828, 5097.8533, 5094.7045, 5094.7045, 5094.7045] +25-08-28 12:16:04 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:16:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:16:04 | D | - sum error = [ 5104.5035, 5100.0900, 5122.6218, 5145.8677, 5168.1235] +25-08-28 12:16:04 | D | - best error = [ 5094.7045, 5094.7045, 5094.7045, 5094.7045, 5094.7045] +25-08-28 12:16:04 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:16:04 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:16:04 | D | - sum error = [ 5179.4424, 5200.4682, 5243.0781, 5271.8579, 5308.7283] +25-08-28 12:16:04 | D | - best error = [ 5094.7045, 5094.7045, 5094.7045, 5094.7045, 5094.7045] +25-08-28 12:16:04 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:16:04 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:16:04 | D | - sum error = [ 5584.3141, 5530.6367, 5477.8662, 5426.6763, 5377.7003] +25-08-28 12:16:04 | D | - best error = [ 5094.7045, 5094.7045, 5094.7045, 5094.7045, 5094.7045] +25-08-28 12:16:04 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:16:04 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:16:04 | D | - sum error = [ 5332.7513, 5300.1528, 5268.2008, 5249.0334, 5221.5942] +25-08-28 12:16:04 | D | - best error = [ 5094.7045, 5094.7045, 5094.7045, 5094.7045, 5094.7045] +25-08-28 12:16:04 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:16:04 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:16:04 | D | - sum error = [ 5208.7021, 5202.1442, 5203.7750, 5203.7287, 5212.0597] +25-08-28 12:16:04 | D | - best error = [ 5094.7045, 5094.7045, 5094.7045, 5094.7045, 5094.7045] +25-08-28 12:16:04 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:16:04 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:16:04 | D | - sum error = [ 5233.1799, 5249.6259, 5287.7716, 5310.3503] +25-08-28 12:16:04 | D | - best error = [ 5094.7045, 5094.7045, 5094.7045, 5094.7045] +25-08-28 12:16:04 | D | + error = 5094.7045 +25-08-28 12:16:04 | D | + scale = [min=0.9617, max=1.8088] +25-08-28 12:16:04 | D | - single_transformer_blocks.24.down_proj +25-08-28 12:16:04 | D | + w: sint4 +25-08-28 12:16:05 | D | + x: uint4 +25-08-28 12:16:05 | D | + y: None +25-08-28 12:16:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:16:05 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:16:07 | D | + x - AbsMax +25-08-28 12:16:07 | D | + x = [min=1.3828, max=11.7500] +25-08-28 12:16:07 | D | + w - AbsMax +25-08-28 12:16:07 | D | + w = [min=0.1079, max=0.9570] +25-08-28 12:16:07 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:16:08 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:17:56 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:17:56 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:17:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:17:56 | D | - sum error = [ 6820.6988, 6832.2845, 6840.8459, 6858.4692, 6918.5539] +25-08-28 12:17:56 | D | - best error = [ 6820.6988, 6820.6988, 6820.6988, 6820.6988, 6820.6988] +25-08-28 12:17:56 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:17:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:17:56 | D | - sum error = [ 6940.5795, 6976.7067, 7000.9562, 7079.1841, 7130.1293] +25-08-28 12:17:56 | D | - best error = [ 6820.6988, 6820.6988, 6820.6988, 6820.6988, 6820.6988] +25-08-28 12:17:56 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:17:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:17:56 | D | - sum error = [ 7203.5641, 7253.5387, 7334.4759, 7432.4365, 7532.6325] +25-08-28 12:17:56 | D | - best error = [ 6820.6988, 6820.6988, 6820.6988, 6820.6988, 6820.6988] +25-08-28 12:17:56 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:17:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:17:56 | D | - sum error = [ 7622.7538, 7704.7995, 7774.2776, 7897.3254, 8045.7223] +25-08-28 12:17:56 | D | - best error = [ 6820.6988, 6820.6988, 6820.6988, 6820.6988, 6820.6988] +25-08-28 12:17:56 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:17:56 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:17:56 | D | - sum error = [ 7704.3499, 7534.9001, 7449.4773, 7360.7524, 7288.3891] +25-08-28 12:17:56 | D | - best error = [ 6820.6988, 6820.6988, 6820.6988, 6820.6988, 6820.6988] +25-08-28 12:17:56 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:17:56 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:17:56 | D | - sum error = [ 7234.0600, 7194.7631, 7172.8206, 7171.0261, 7176.4682] +25-08-28 12:17:56 | D | - best error = [ 6820.6988, 6820.6988, 6820.6988, 6820.6988, 6820.6988] +25-08-28 12:17:56 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:17:56 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:17:56 | D | - sum error = [ 7169.0172, 7203.5836, 7287.4489, 7363.9685, 7480.4855] +25-08-28 12:17:56 | D | - best error = [ 6820.6988, 6820.6988, 6820.6988, 6820.6988, 6820.6988] +25-08-28 12:17:56 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:17:56 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:17:56 | D | - sum error = [ 7582.3521, 7700.6467, 7806.2462, 8013.1138] +25-08-28 12:17:56 | D | - best error = [ 6820.6988, 6820.6988, 6820.6988, 6820.6988] +25-08-28 12:17:56 | D | + error = 6820.6988 +25-08-28 12:17:56 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 12:18:15 | D | - Smoothing Diffusion Block single_transformer_blocks.25 +25-08-28 12:18:15 | D | - Skipping Module single_transformer_blocks.25.norm.linear +25-08-28 12:18:15 | D | - Smoothing Transformer Block single_transformer_blocks.25 +25-08-28 12:18:15 | D | - single_transformer_blocks.25.attn.qkv_proj + single_transformer_blocks.25.up_proj +25-08-28 12:18:15 | D | + w: sint4 +25-08-28 12:18:15 | D | + x: sint4 +25-08-28 12:18:15 | D | + y: None +25-08-28 12:18:15 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:18:15 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:18:15 | D | + x - AbsMax +25-08-28 12:18:15 | D | + x = [min=0.3125, max=18.5000] +25-08-28 12:18:15 | D | + w - AbsMax +25-08-28 12:18:15 | D | + w = [min=0.1206, max=1.1562] +25-08-28 12:18:15 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:18:17 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:20:26 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:20:26 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:20:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:20:26 | D | - sum error = [101483.9829, 98315.7389, 95359.7369, 92776.3115, 90342.6282] +25-08-28 12:20:26 | D | - best error = [101483.9829, 98315.7389, 95359.7369, 92776.3115, 90342.6282] +25-08-28 12:20:26 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:20:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:20:26 | D | - sum error = [88483.3880, 86820.2048, 84873.3238, 83504.1334, 82202.2295] +25-08-28 12:20:26 | D | - best error = [88483.3880, 86820.2048, 84873.3238, 83504.1334, 82202.2295] +25-08-28 12:20:26 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:20:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:20:26 | D | - sum error = [81638.6450, 80951.5031, 80124.0127, 79543.1210, 79161.9852] +25-08-28 12:20:26 | D | - best error = [81638.6450, 80951.5031, 80124.0127, 79543.1210, 79161.9852] +25-08-28 12:20:26 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:20:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:20:26 | D | - sum error = [79009.5391, 78709.7359, 78873.6549, 78749.7082, 79226.7010] +25-08-28 12:20:26 | D | - best error = [79009.5391, 78709.7359, 78709.7359, 78709.7359, 78709.7359] +25-08-28 12:20:26 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:20:26 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:20:26 | D | - sum error = [150826.6697, 142271.4530, 133572.8221, 125336.4879, 117942.2329] +25-08-28 12:20:26 | D | - best error = [78709.7359, 78709.7359, 78709.7359, 78709.7359, 78709.7359] +25-08-28 12:20:26 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:20:26 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:20:26 | D | - sum error = [111728.6725, 106113.0181, 100860.4060, 96489.5571, 92734.1118] +25-08-28 12:20:26 | D | - best error = [78709.7359, 78709.7359, 78709.7359, 78709.7359, 78709.7359] +25-08-28 12:20:26 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:20:26 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:20:26 | D | - sum error = [89250.6532, 86615.1780, 84421.9740, 82832.1561, 81518.3466] +25-08-28 12:20:26 | D | - best error = [78709.7359, 78709.7359, 78709.7359, 78709.7359, 78709.7359] +25-08-28 12:20:26 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:20:26 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:20:26 | D | - sum error = [80215.0709, 79551.9453, 79129.0058, 79225.4537] +25-08-28 12:20:26 | D | - best error = [78709.7359, 78709.7359, 78709.7359, 78709.7359] +25-08-28 12:20:26 | D | + error = 78709.7359 +25-08-28 12:20:26 | D | + scale = [min=0.3943, max=10.3214] +25-08-28 12:20:26 | D | - single_transformer_blocks.25.attn.out_proj +25-08-28 12:20:26 | D | + w: sint4 +25-08-28 12:20:26 | D | + x: sint4 +25-08-28 12:20:26 | D | + y: None +25-08-28 12:20:26 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:20:26 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:20:27 | D | + x - AbsMax +25-08-28 12:20:27 | D | + x = [min=1.0156, max=7.8125] +25-08-28 12:20:27 | D | + w - AbsMax +25-08-28 12:20:27 | D | + w = [min=0.1094, max=0.4648] +25-08-28 12:20:27 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:20:28 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:21:31 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:21:31 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:21:31 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:21:31 | D | - sum error = [ 6535.5759, 6513.6831, 6512.2752, 6503.9176, 6496.4418] +25-08-28 12:21:31 | D | - best error = [ 6535.5759, 6513.6831, 6512.2752, 6503.9176, 6496.4418] +25-08-28 12:21:31 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:21:31 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:21:31 | D | - sum error = [ 6484.1332, 6491.9387, 6484.9324, 6469.8934, 6480.9555] +25-08-28 12:21:31 | D | - best error = [ 6484.1332, 6484.1332, 6484.1332, 6469.8934, 6469.8934] +25-08-28 12:21:31 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:21:31 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:21:31 | D | - sum error = [ 6483.4300, 6498.4983, 6506.7983, 6525.9553, 6550.2687] +25-08-28 12:21:31 | D | - best error = [ 6469.8934, 6469.8934, 6469.8934, 6469.8934, 6469.8934] +25-08-28 12:21:31 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:21:31 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:21:31 | D | - sum error = [ 6571.5036, 6578.0246, 6599.1375, 6629.6268, 6664.4766] +25-08-28 12:21:31 | D | - best error = [ 6469.8934, 6469.8934, 6469.8934, 6469.8934, 6469.8934] +25-08-28 12:21:31 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:21:31 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:21:31 | D | - sum error = [ 7284.4643, 7186.1294, 7105.1950, 7028.9003, 6946.3294] +25-08-28 12:21:31 | D | - best error = [ 6469.8934, 6469.8934, 6469.8934, 6469.8934, 6469.8934] +25-08-28 12:21:31 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:21:31 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:21:31 | D | - sum error = [ 6883.4558, 6823.8124, 6773.2745, 6728.0208, 6693.0304] +25-08-28 12:21:31 | D | - best error = [ 6469.8934, 6469.8934, 6469.8934, 6469.8934, 6469.8934] +25-08-28 12:21:31 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:21:31 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:21:31 | D | - sum error = [ 6669.9349, 6637.5492, 6628.3081, 6614.9984, 6609.8451] +25-08-28 12:21:31 | D | - best error = [ 6469.8934, 6469.8934, 6469.8934, 6469.8934, 6469.8934] +25-08-28 12:21:31 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:21:31 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:21:31 | D | - sum error = [ 6619.0637, 6618.3732, 6641.3158, 6670.0411] +25-08-28 12:21:31 | D | - best error = [ 6469.8934, 6469.8934, 6469.8934, 6469.8934] +25-08-28 12:21:31 | D | + error = 6469.8934 +25-08-28 12:21:31 | D | + scale = [min=1.0062, max=2.2757] +25-08-28 12:21:31 | D | - single_transformer_blocks.25.down_proj +25-08-28 12:21:31 | D | + w: sint4 +25-08-28 12:21:31 | D | + x: uint4 +25-08-28 12:21:31 | D | + y: None +25-08-28 12:21:31 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:21:31 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:21:34 | D | + x - AbsMax +25-08-28 12:21:34 | D | + x = [min=0.9258, max=12.8750] +25-08-28 12:21:34 | D | + w - AbsMax +25-08-28 12:21:34 | D | + w = [min=0.1108, max=1.3359] +25-08-28 12:21:34 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:21:37 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:23:39 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:23:39 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:23:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:23:39 | D | - sum error = [ 7232.8846, 7226.4971, 7265.4789, 7252.7363, 7271.2799] +25-08-28 12:23:39 | D | - best error = [ 7232.8846, 7226.4971, 7226.4971, 7226.4971, 7226.4971] +25-08-28 12:23:39 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:23:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:23:39 | D | - sum error = [ 7328.8343, 7371.4183, 7383.5179, 7418.9182, 7465.7029] +25-08-28 12:23:39 | D | - best error = [ 7226.4971, 7226.4971, 7226.4971, 7226.4971, 7226.4971] +25-08-28 12:23:39 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:23:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:23:39 | D | - sum error = [ 7511.0827, 7588.4599, 7688.3970, 7814.2309, 7917.1995] +25-08-28 12:23:39 | D | - best error = [ 7226.4971, 7226.4971, 7226.4971, 7226.4971, 7226.4971] +25-08-28 12:23:39 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:23:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:23:39 | D | - sum error = [ 8043.8722, 8159.2674, 8318.9446, 8457.2421, 8631.2270] +25-08-28 12:23:39 | D | - best error = [ 7226.4971, 7226.4971, 7226.4971, 7226.4971, 7226.4971] +25-08-28 12:23:39 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:23:39 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:23:39 | D | - sum error = [ 8385.6640, 8273.4709, 8106.6842, 7985.8405, 7885.1610] +25-08-28 12:23:39 | D | - best error = [ 7226.4971, 7226.4971, 7226.4971, 7226.4971, 7226.4971] +25-08-28 12:23:39 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:23:39 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:23:39 | D | - sum error = [ 7782.6533, 7725.1242, 7698.1930, 7690.6036, 7683.4331] +25-08-28 12:23:39 | D | - best error = [ 7226.4971, 7226.4971, 7226.4971, 7226.4971, 7226.4971] +25-08-28 12:23:39 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:23:39 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:23:39 | D | - sum error = [ 7649.7260, 7710.5598, 7765.3945, 7833.7541, 7902.3566] +25-08-28 12:23:39 | D | - best error = [ 7226.4971, 7226.4971, 7226.4971, 7226.4971, 7226.4971] +25-08-28 12:23:39 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:23:39 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:23:39 | D | - sum error = [ 8033.2962, 8143.1522, 8372.2575, 8553.4391] +25-08-28 12:23:39 | D | - best error = [ 7226.4971, 7226.4971, 7226.4971, 7226.4971] +25-08-28 12:23:39 | D | + error = 7226.4971 +25-08-28 12:23:39 | D | + scale = [min=0.9962, max=1.1363] +25-08-28 12:24:00 | D | - Smoothing Diffusion Block single_transformer_blocks.26 +25-08-28 12:24:00 | D | - Skipping Module single_transformer_blocks.26.norm.linear +25-08-28 12:24:00 | D | - Smoothing Transformer Block single_transformer_blocks.26 +25-08-28 12:24:00 | D | - single_transformer_blocks.26.attn.qkv_proj + single_transformer_blocks.26.up_proj +25-08-28 12:24:00 | D | + w: sint4 +25-08-28 12:24:00 | D | + x: sint4 +25-08-28 12:24:00 | D | + y: None +25-08-28 12:24:00 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:24:00 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:24:01 | D | + x - AbsMax +25-08-28 12:24:01 | D | + x = [min=0.4102, max=20.3750] +25-08-28 12:24:01 | D | + w - AbsMax +25-08-28 12:24:01 | D | + w = [min=0.0713, max=0.9023] +25-08-28 12:24:01 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:24:02 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:26:08 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:26:08 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:26:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:26:08 | D | - sum error = [102758.4128, 99314.9734, 96276.7637, 93699.2950, 91232.6117] +25-08-28 12:26:08 | D | - best error = [102758.4128, 99314.9734, 96276.7637, 93699.2950, 91232.6117] +25-08-28 12:26:08 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:26:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:26:08 | D | - sum error = [88884.6472, 86962.2947, 85050.1367, 83767.4465, 82511.2414] +25-08-28 12:26:08 | D | - best error = [88884.6472, 86962.2947, 85050.1367, 83767.4465, 82511.2414] +25-08-28 12:26:08 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:26:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:26:08 | D | - sum error = [81572.1552, 80678.1402, 79705.8079, 79502.1115, 79235.4797] +25-08-28 12:26:08 | D | - best error = [81572.1552, 80678.1402, 79705.8079, 79502.1115, 79235.4797] +25-08-28 12:26:08 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:26:08 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:26:08 | D | - sum error = [78719.1675, 78813.5899, 79022.9876, 79351.3027, 79203.2323] +25-08-28 12:26:08 | D | - best error = [78719.1675, 78719.1675, 78719.1675, 78719.1675, 78719.1675] +25-08-28 12:26:08 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:26:08 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:26:08 | D | - sum error = [150293.3954, 141813.8625, 133269.1933, 125394.2529, 117903.6515] +25-08-28 12:26:08 | D | - best error = [78719.1675, 78719.1675, 78719.1675, 78719.1675, 78719.1675] +25-08-28 12:26:08 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:26:08 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:26:08 | D | - sum error = [111063.4949, 105504.4707, 100049.7336, 95689.6851, 91944.5537] +25-08-28 12:26:08 | D | - best error = [78719.1675, 78719.1675, 78719.1675, 78719.1675, 78719.1675] +25-08-28 12:26:08 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:26:08 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:26:08 | D | - sum error = [88964.2762, 86262.8838, 84056.9042, 82504.5705, 81200.3137] +25-08-28 12:26:08 | D | - best error = [78719.1675, 78719.1675, 78719.1675, 78719.1675, 78719.1675] +25-08-28 12:26:08 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:26:08 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:26:08 | D | - sum error = [80047.9920, 79763.1309, 79474.8910, 79443.5625] +25-08-28 12:26:08 | D | - best error = [78719.1675, 78719.1675, 78719.1675, 78719.1675] +25-08-28 12:26:08 | D | + error = 78719.1675 +25-08-28 12:26:08 | D | + scale = [min=0.5125, max=9.5901] +25-08-28 12:26:09 | D | - single_transformer_blocks.26.attn.out_proj +25-08-28 12:26:09 | D | + w: sint4 +25-08-28 12:26:09 | D | + x: sint4 +25-08-28 12:26:09 | D | + y: None +25-08-28 12:26:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:26:09 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:26:10 | D | + x - AbsMax +25-08-28 12:26:10 | D | + x = [min=0.7852, max=6.5312] +25-08-28 12:26:10 | D | + w - AbsMax +25-08-28 12:26:10 | D | + w = [min=0.1084, max=0.4355] +25-08-28 12:26:10 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:26:11 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:27:09 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:27:09 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:27:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:27:09 | D | - sum error = [ 5475.9286, 5452.6991, 5427.2074, 5395.7626, 5393.1547] +25-08-28 12:27:09 | D | - best error = [ 5475.9286, 5452.6991, 5427.2074, 5395.7626, 5393.1547] +25-08-28 12:27:09 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:27:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:27:09 | D | - sum error = [ 5381.0603, 5366.8313, 5378.2142, 5377.8107, 5369.7609] +25-08-28 12:27:09 | D | - best error = [ 5381.0603, 5366.8313, 5366.8313, 5366.8313, 5366.8313] +25-08-28 12:27:09 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:27:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:27:09 | D | - sum error = [ 5389.0344, 5401.5383, 5422.5606, 5437.2088, 5452.8558] +25-08-28 12:27:09 | D | - best error = [ 5366.8313, 5366.8313, 5366.8313, 5366.8313, 5366.8313] +25-08-28 12:27:09 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:27:09 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:27:09 | D | - sum error = [ 5493.1248, 5542.7551, 5580.9481, 5601.7578, 5646.6415] +25-08-28 12:27:09 | D | - best error = [ 5366.8313, 5366.8313, 5366.8313, 5366.8313, 5366.8313] +25-08-28 12:27:09 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:27:09 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:27:09 | D | - sum error = [ 5958.0940, 5887.9969, 5814.9982, 5766.3319, 5708.2172] +25-08-28 12:27:09 | D | - best error = [ 5366.8313, 5366.8313, 5366.8313, 5366.8313, 5366.8313] +25-08-28 12:27:09 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:27:09 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:27:09 | D | - sum error = [ 5665.7086, 5622.5868, 5594.1802, 5572.5897, 5559.3194] +25-08-28 12:27:09 | D | - best error = [ 5366.8313, 5366.8313, 5366.8313, 5366.8313, 5366.8313] +25-08-28 12:27:09 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:27:09 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:27:09 | D | - sum error = [ 5547.2941, 5544.1425, 5525.1425, 5539.0846, 5558.5126] +25-08-28 12:27:09 | D | - best error = [ 5366.8313, 5366.8313, 5366.8313, 5366.8313, 5366.8313] +25-08-28 12:27:09 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:27:09 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:27:09 | D | - sum error = [ 5564.9685, 5605.7756, 5626.4340, 5654.2059] +25-08-28 12:27:09 | D | - best error = [ 5366.8313, 5366.8313, 5366.8313, 5366.8313] +25-08-28 12:27:09 | D | + error = 5366.8313 +25-08-28 12:27:09 | D | + scale = [min=0.9300, max=1.7559] +25-08-28 12:27:09 | D | - single_transformer_blocks.26.down_proj +25-08-28 12:27:09 | D | + w: sint4 +25-08-28 12:27:09 | D | + x: uint4 +25-08-28 12:27:09 | D | + y: None +25-08-28 12:27:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:27:09 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:27:11 | D | + x - AbsMax +25-08-28 12:27:11 | D | + x = [min=1.1953, max=13.0000] +25-08-28 12:27:11 | D | + w - AbsMax +25-08-28 12:27:11 | D | + w = [min=0.1104, max=1.4531] +25-08-28 12:27:11 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:27:13 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:29:01 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:29:01 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:29:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:29:01 | D | - sum error = [ 6935.6897, 6984.6784, 7002.7974, 7021.6998, 7069.0469] +25-08-28 12:29:01 | D | - best error = [ 6935.6897, 6935.6897, 6935.6897, 6935.6897, 6935.6897] +25-08-28 12:29:01 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:29:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:29:01 | D | - sum error = [ 7112.8267, 7162.3477, 7225.6834, 7284.0824, 7357.6018] +25-08-28 12:29:01 | D | - best error = [ 6935.6897, 6935.6897, 6935.6897, 6935.6897, 6935.6897] +25-08-28 12:29:01 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:29:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:29:01 | D | - sum error = [ 7415.4536, 7504.0813, 7576.4092, 7723.9663, 7801.9705] +25-08-28 12:29:01 | D | - best error = [ 6935.6897, 6935.6897, 6935.6897, 6935.6897, 6935.6897] +25-08-28 12:29:01 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:29:01 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:29:01 | D | - sum error = [ 7931.6092, 8065.1222, 8182.8394, 8349.4857, 8468.1209] +25-08-28 12:29:01 | D | - best error = [ 6935.6897, 6935.6897, 6935.6897, 6935.6897, 6935.6897] +25-08-28 12:29:01 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:29:01 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:29:01 | D | - sum error = [ 8949.3209, 8713.8501, 8460.5721, 8210.4105, 8043.9112] +25-08-28 12:29:01 | D | - best error = [ 6935.6897, 6935.6897, 6935.6897, 6935.6897, 6935.6897] +25-08-28 12:29:01 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:29:01 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:29:01 | D | - sum error = [ 7954.8961, 7862.2857, 7784.3481, 7787.0498, 7728.9937] +25-08-28 12:29:01 | D | - best error = [ 6935.6897, 6935.6897, 6935.6897, 6935.6897, 6935.6897] +25-08-28 12:29:01 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:29:01 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:29:01 | D | - sum error = [ 7659.2148, 7692.1342, 7767.9719, 7835.4487, 7959.2050] +25-08-28 12:29:01 | D | - best error = [ 6935.6897, 6935.6897, 6935.6897, 6935.6897, 6935.6897] +25-08-28 12:29:01 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:29:01 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:29:01 | D | - sum error = [ 8091.7150, 8158.7683, 8320.9653, 8459.8520] +25-08-28 12:29:01 | D | - best error = [ 6935.6897, 6935.6897, 6935.6897, 6935.6897] +25-08-28 12:29:01 | D | + error = 6935.6897 +25-08-28 12:29:01 | D | + scale = [min=1.0000, max=1.0000] +25-08-28 12:29:19 | D | - Smoothing Diffusion Block single_transformer_blocks.27 +25-08-28 12:29:19 | D | - Skipping Module single_transformer_blocks.27.norm.linear +25-08-28 12:29:19 | D | - Smoothing Transformer Block single_transformer_blocks.27 +25-08-28 12:29:19 | D | - single_transformer_blocks.27.attn.qkv_proj + single_transformer_blocks.27.up_proj +25-08-28 12:29:19 | D | + w: sint4 +25-08-28 12:29:19 | D | + x: sint4 +25-08-28 12:29:19 | D | + y: None +25-08-28 12:29:19 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:29:19 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:29:19 | D | + x - AbsMax +25-08-28 12:29:19 | D | + x = [min=1.1094, max=18.3750] +25-08-28 12:29:19 | D | + w - AbsMax +25-08-28 12:29:19 | D | + w = [min=0.1079, max=0.7500] +25-08-28 12:29:19 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:29:20 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:31:26 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:31:26 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:31:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:31:26 | D | - sum error = [125209.2817, 121057.1250, 117569.6766, 114484.7619, 111018.5201] +25-08-28 12:31:26 | D | - best error = [125209.2817, 121057.1250, 117569.6766, 114484.7619, 111018.5201] +25-08-28 12:31:26 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:31:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:31:26 | D | - sum error = [108298.6709, 105808.3230, 103950.8019, 101777.7997, 100493.7214] +25-08-28 12:31:26 | D | - best error = [108298.6709, 105808.3230, 103950.8019, 101777.7997, 100493.7214] +25-08-28 12:31:26 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:31:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:31:26 | D | - sum error = [98967.4849, 97587.6976, 97090.8582, 97155.7312, 96842.7965] +25-08-28 12:31:26 | D | - best error = [98967.4849, 97587.6976, 97090.8582, 97090.8582, 96842.7965] +25-08-28 12:31:26 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:31:26 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:31:26 | D | - sum error = [95937.8999, 96460.3098, 97164.8529, 97934.8067, 99105.2244] +25-08-28 12:31:26 | D | - best error = [95937.8999, 95937.8999, 95937.8999, 95937.8999, 95937.8999] +25-08-28 12:31:26 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:31:26 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:31:26 | D | - sum error = [207302.8662, 193638.0472, 180035.0942, 168180.3040, 157089.8415] +25-08-28 12:31:26 | D | - best error = [95937.8999, 95937.8999, 95937.8999, 95937.8999, 95937.8999] +25-08-28 12:31:26 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:31:26 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:31:26 | D | - sum error = [146485.2281, 137625.3744, 130385.4441, 123064.1038, 117395.2700] +25-08-28 12:31:26 | D | - best error = [95937.8999, 95937.8999, 95937.8999, 95937.8999, 95937.8999] +25-08-28 12:31:26 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:31:26 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:31:26 | D | - sum error = [112008.9779, 107781.6056, 104270.6583, 101269.5704, 99124.8025] +25-08-28 12:31:26 | D | - best error = [95937.8999, 95937.8999, 95937.8999, 95937.8999, 95937.8999] +25-08-28 12:31:26 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:31:26 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:31:26 | D | - sum error = [97966.6869, 97423.9041, 97376.3313, 98435.1313] +25-08-28 12:31:26 | D | - best error = [95937.8999, 95937.8999, 95937.8999, 95937.8999] +25-08-28 12:31:26 | D | + error = 95937.8999 +25-08-28 12:31:26 | D | + scale = [min=1.0810, max=8.8750] +25-08-28 12:31:26 | D | - single_transformer_blocks.27.attn.out_proj +25-08-28 12:31:26 | D | + w: sint4 +25-08-28 12:31:26 | D | + x: sint4 +25-08-28 12:31:26 | D | + y: None +25-08-28 12:31:26 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:31:26 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:31:27 | D | + x - AbsMax +25-08-28 12:31:27 | D | + x = [min=0.8594, max=9.5625] +25-08-28 12:31:27 | D | + w - AbsMax +25-08-28 12:31:27 | D | + w = [min=0.1128, max=0.4258] +25-08-28 12:31:27 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:31:28 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:32:27 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:32:27 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:32:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:32:27 | D | - sum error = [ 7805.5785, 7736.0380, 7681.8339, 7617.5340, 7573.8450] +25-08-28 12:32:27 | D | - best error = [ 7805.5785, 7736.0380, 7681.8339, 7617.5340, 7573.8450] +25-08-28 12:32:27 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:32:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:32:27 | D | - sum error = [ 7516.0852, 7484.0243, 7411.2814, 7365.0477, 7307.3716] +25-08-28 12:32:27 | D | - best error = [ 7516.0852, 7484.0243, 7411.2814, 7365.0477, 7307.3716] +25-08-28 12:32:27 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:32:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:32:27 | D | - sum error = [ 7294.8313, 7299.2564, 7279.7190, 7273.5163, 7280.8195] +25-08-28 12:32:27 | D | - best error = [ 7294.8313, 7294.8313, 7279.7190, 7273.5163, 7273.5163] +25-08-28 12:32:27 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:32:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:32:27 | D | - sum error = [ 7266.4198, 7294.1440, 7298.4396, 7329.7954, 7373.4225] +25-08-28 12:32:27 | D | - best error = [ 7266.4198, 7266.4198, 7266.4198, 7266.4198, 7266.4198] +25-08-28 12:32:27 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:32:27 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:32:27 | D | - sum error = [ 8407.7693, 8286.9493, 8152.2120, 8015.3892, 7898.3998] +25-08-28 12:32:27 | D | - best error = [ 7266.4198, 7266.4198, 7266.4198, 7266.4198, 7266.4198] +25-08-28 12:32:27 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:32:27 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:32:27 | D | - sum error = [ 7821.5950, 7688.4448, 7615.1171, 7532.6436, 7488.9353] +25-08-28 12:32:27 | D | - best error = [ 7266.4198, 7266.4198, 7266.4198, 7266.4198, 7266.4198] +25-08-28 12:32:27 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:32:27 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:32:27 | D | - sum error = [ 7417.1787, 7379.6545, 7337.4541, 7325.9042, 7320.6612] +25-08-28 12:32:27 | D | - best error = [ 7266.4198, 7266.4198, 7266.4198, 7266.4198, 7266.4198] +25-08-28 12:32:27 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:32:27 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:32:27 | D | - sum error = [ 7319.4440, 7305.3564, 7341.7055, 7366.3912] +25-08-28 12:32:27 | D | - best error = [ 7266.4198, 7266.4198, 7266.4198, 7266.4198] +25-08-28 12:32:27 | D | + error = 7266.4198 +25-08-28 12:32:27 | D | + scale = [min=0.8926, max=5.4379] +25-08-28 12:32:27 | D | - single_transformer_blocks.27.down_proj +25-08-28 12:32:27 | D | + w: sint4 +25-08-28 12:32:27 | D | + x: uint4 +25-08-28 12:32:27 | D | + y: None +25-08-28 12:32:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:32:27 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:32:28 | D | + x - AbsMax +25-08-28 12:32:28 | D | + x = [min=1.2969, max=12.1875] +25-08-28 12:32:28 | D | + w - AbsMax +25-08-28 12:32:28 | D | + w = [min=0.1113, max=0.8086] +25-08-28 12:32:28 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:32:30 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:34:16 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:34:16 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:34:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:34:16 | D | - sum error = [ 7445.5383, 7426.3008, 7429.6288, 7464.9000, 7493.1206] +25-08-28 12:34:16 | D | - best error = [ 7445.5383, 7426.3008, 7426.3008, 7426.3008, 7426.3008] +25-08-28 12:34:16 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:34:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:34:16 | D | - sum error = [ 7562.7836, 7596.5139, 7678.7546, 7738.9167, 7763.2846] +25-08-28 12:34:16 | D | - best error = [ 7426.3008, 7426.3008, 7426.3008, 7426.3008, 7426.3008] +25-08-28 12:34:16 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:34:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:34:16 | D | - sum error = [ 7887.5723, 8007.1358, 8057.8062, 8229.8146, 8339.9765] +25-08-28 12:34:16 | D | - best error = [ 7426.3008, 7426.3008, 7426.3008, 7426.3008, 7426.3008] +25-08-28 12:34:16 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:34:16 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:34:16 | D | - sum error = [ 8508.8058, 8703.1070, 8911.8914, 9087.8882, 9267.5805] +25-08-28 12:34:16 | D | - best error = [ 7426.3008, 7426.3008, 7426.3008, 7426.3008, 7426.3008] +25-08-28 12:34:16 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:34:16 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:34:16 | D | - sum error = [ 8433.7702, 8277.4383, 8166.8332, 8037.9734, 8003.6529] +25-08-28 12:34:16 | D | - best error = [ 7426.3008, 7426.3008, 7426.3008, 7426.3008, 7426.3008] +25-08-28 12:34:16 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:34:16 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:34:16 | D | - sum error = [ 7935.6336, 7917.4084, 7878.0427, 7902.6639, 7902.3257] +25-08-28 12:34:16 | D | - best error = [ 7426.3008, 7426.3008, 7426.3008, 7426.3008, 7426.3008] +25-08-28 12:34:16 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:34:16 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:34:16 | D | - sum error = [ 7912.5735, 7974.9322, 8109.4473, 8225.8388, 8380.8835] +25-08-28 12:34:16 | D | - best error = [ 7426.3008, 7426.3008, 7426.3008, 7426.3008, 7426.3008] +25-08-28 12:34:16 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:34:16 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:34:16 | D | - sum error = [ 8529.4389, 8702.8331, 8888.7007, 9155.1293] +25-08-28 12:34:16 | D | - best error = [ 7426.3008, 7426.3008, 7426.3008, 7426.3008] +25-08-28 12:34:16 | D | + error = 7426.3008 +25-08-28 12:34:16 | D | + scale = [min=1.0131, max=1.1332] +25-08-28 12:34:34 | D | - Smoothing Diffusion Block single_transformer_blocks.28 +25-08-28 12:34:34 | D | - Skipping Module single_transformer_blocks.28.norm.linear +25-08-28 12:34:34 | D | - Smoothing Transformer Block single_transformer_blocks.28 +25-08-28 12:34:34 | D | - single_transformer_blocks.28.attn.qkv_proj + single_transformer_blocks.28.up_proj +25-08-28 12:34:34 | D | + w: sint4 +25-08-28 12:34:34 | D | + x: sint4 +25-08-28 12:34:34 | D | + y: None +25-08-28 12:34:34 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:34:34 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:34:34 | D | + x - AbsMax +25-08-28 12:34:34 | D | + x = [min=0.4766, max=31.8750] +25-08-28 12:34:34 | D | + w - AbsMax +25-08-28 12:34:34 | D | + w = [min=0.1553, max=1.8750] +25-08-28 12:34:34 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:34:35 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:36:41 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:36:41 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:36:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:36:41 | D | - sum error = [127159.4119, 122721.3267, 118382.1435, 114573.7020, 111144.6746] +25-08-28 12:36:41 | D | - best error = [127159.4119, 122721.3267, 118382.1435, 114573.7020, 111144.6746] +25-08-28 12:36:41 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:36:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:36:41 | D | - sum error = [108430.6342, 106119.5606, 104085.6952, 101953.4413, 100902.0994] +25-08-28 12:36:41 | D | - best error = [108430.6342, 106119.5606, 104085.6952, 101953.4413, 100902.0994] +25-08-28 12:36:41 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:36:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:36:41 | D | - sum error = [99883.9801, 98740.9145, 98432.5410, 97814.8978, 98109.1663] +25-08-28 12:36:41 | D | - best error = [99883.9801, 98740.9145, 98432.5410, 97814.8978, 97814.8978] +25-08-28 12:36:41 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:36:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:36:41 | D | - sum error = [98000.7667, 98229.8992, 99394.3387, 100250.3465, 100952.3471] +25-08-28 12:36:41 | D | - best error = [97814.8978, 97814.8978, 97814.8978, 97814.8978, 97814.8978] +25-08-28 12:36:41 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:36:41 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:36:41 | D | - sum error = [233158.3507, 217802.5316, 204616.0474, 191006.5177, 179417.3824] +25-08-28 12:36:41 | D | - best error = [97814.8978, 97814.8978, 97814.8978, 97814.8978, 97814.8978] +25-08-28 12:36:41 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:36:41 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:36:41 | D | - sum error = [167784.4499, 156927.6672, 147110.4006, 137647.2031, 130255.0556] +25-08-28 12:36:41 | D | - best error = [97814.8978, 97814.8978, 97814.8978, 97814.8978, 97814.8978] +25-08-28 12:36:41 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:36:41 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:36:41 | D | - sum error = [123208.6937, 117071.1636, 111778.5926, 108308.5512, 105188.7021] +25-08-28 12:36:41 | D | - best error = [97814.8978, 97814.8978, 97814.8978, 97814.8978, 97814.8978] +25-08-28 12:36:41 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:36:41 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:36:41 | D | - sum error = [103609.1412, 102085.8230, 101549.4526, 101805.5546] +25-08-28 12:36:41 | D | - best error = [97814.8978, 97814.8978, 97814.8978, 97814.8978] +25-08-28 12:36:41 | D | + error = 97814.8978 +25-08-28 12:36:41 | D | + scale = [min=0.6177, max=9.4895] +25-08-28 12:36:41 | D | - single_transformer_blocks.28.attn.out_proj +25-08-28 12:36:41 | D | + w: sint4 +25-08-28 12:36:41 | D | + x: sint4 +25-08-28 12:36:41 | D | + y: None +25-08-28 12:36:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:36:41 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:36:41 | D | + x - AbsMax +25-08-28 12:36:41 | D | + x = [min=0.7500, max=6.7812] +25-08-28 12:36:41 | D | + w - AbsMax +25-08-28 12:36:41 | D | + w = [min=0.1074, max=0.4863] +25-08-28 12:36:41 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:36:42 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:37:39 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:37:39 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:37:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:37:39 | D | - sum error = [ 5669.1327, 5658.6705, 5645.7212, 5631.8217, 5625.5058] +25-08-28 12:37:39 | D | - best error = [ 5669.1327, 5658.6705, 5645.7212, 5631.8217, 5625.5058] +25-08-28 12:37:39 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:37:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:37:39 | D | - sum error = [ 5622.9857, 5615.4948, 5618.1887, 5620.8862, 5614.8786] +25-08-28 12:37:39 | D | - best error = [ 5622.9857, 5615.4948, 5615.4948, 5615.4948, 5614.8786] +25-08-28 12:37:39 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:37:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:37:39 | D | - sum error = [ 5627.9721, 5631.1397, 5645.8096, 5652.9127, 5668.1835] +25-08-28 12:37:39 | D | - best error = [ 5614.8786, 5614.8786, 5614.8786, 5614.8786, 5614.8786] +25-08-28 12:37:39 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:37:39 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:37:39 | D | - sum error = [ 5694.2945, 5715.2865, 5740.6757, 5763.7179, 5800.3872] +25-08-28 12:37:39 | D | - best error = [ 5614.8786, 5614.8786, 5614.8786, 5614.8786, 5614.8786] +25-08-28 12:37:39 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:37:39 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:37:39 | D | - sum error = [ 6248.9865, 6172.3590, 6103.2116, 6031.1160, 5971.8881] +25-08-28 12:37:39 | D | - best error = [ 5614.8786, 5614.8786, 5614.8786, 5614.8786, 5614.8786] +25-08-28 12:37:39 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:37:39 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:37:39 | D | - sum error = [ 5927.2118, 5872.6001, 5833.5883, 5800.7917, 5781.6767] +25-08-28 12:37:39 | D | - best error = [ 5614.8786, 5614.8786, 5614.8786, 5614.8786, 5614.8786] +25-08-28 12:37:39 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:37:39 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:37:39 | D | - sum error = [ 5752.2498, 5732.2694, 5721.6386, 5720.6931, 5722.7988] +25-08-28 12:37:39 | D | - best error = [ 5614.8786, 5614.8786, 5614.8786, 5614.8786, 5614.8786] +25-08-28 12:37:39 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:37:39 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:37:39 | D | - sum error = [ 5740.2169, 5752.1634, 5762.0378, 5802.2558] +25-08-28 12:37:39 | D | - best error = [ 5614.8786, 5614.8786, 5614.8786, 5614.8786] +25-08-28 12:37:39 | D | + error = 5614.8786 +25-08-28 12:37:39 | D | + scale = [min=0.8786, max=2.3664] +25-08-28 12:37:39 | D | - single_transformer_blocks.28.down_proj +25-08-28 12:37:39 | D | + w: sint4 +25-08-28 12:37:39 | D | + x: uint4 +25-08-28 12:37:39 | D | + y: None +25-08-28 12:37:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:37:39 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:37:42 | D | + x - AbsMax +25-08-28 12:37:42 | D | + x = [min=0.5781, max=14.7500] +25-08-28 12:37:42 | D | + w - AbsMax +25-08-28 12:37:42 | D | + w = [min=0.1055, max=0.8867] +25-08-28 12:37:42 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:37:43 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:39:34 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:39:34 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:39:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:39:34 | D | - sum error = [ 6902.9414, 6878.2329, 6862.1224, 6863.7459, 6879.9198] +25-08-28 12:39:34 | D | - best error = [ 6902.9414, 6878.2329, 6862.1224, 6862.1224, 6862.1224] +25-08-28 12:39:34 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:39:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:39:34 | D | - sum error = [ 6874.9572, 6863.9417, 6896.9628, 6959.6644, 7034.8188] +25-08-28 12:39:34 | D | - best error = [ 6862.1224, 6862.1224, 6862.1224, 6862.1224, 6862.1224] +25-08-28 12:39:34 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:39:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:39:34 | D | - sum error = [ 7095.0687, 7149.4291, 7207.5497, 7263.7541, 7380.1610] +25-08-28 12:39:34 | D | - best error = [ 6862.1224, 6862.1224, 6862.1224, 6862.1224, 6862.1224] +25-08-28 12:39:34 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:39:34 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:39:34 | D | - sum error = [ 7512.8437, 7644.7307, 7765.1787, 7872.9202, 8046.4124] +25-08-28 12:39:34 | D | - best error = [ 6862.1224, 6862.1224, 6862.1224, 6862.1224, 6862.1224] +25-08-28 12:39:34 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:39:34 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:39:34 | D | - sum error = [ 7474.8833, 7328.8236, 7201.2263, 7122.7781, 7070.8992] +25-08-28 12:39:34 | D | - best error = [ 6862.1224, 6862.1224, 6862.1224, 6862.1224, 6862.1224] +25-08-28 12:39:34 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:39:34 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:39:34 | D | - sum error = [ 6990.1780, 6944.1933, 6889.8206, 6882.4824, 6918.0688] +25-08-28 12:39:34 | D | - best error = [ 6862.1224, 6862.1224, 6862.1224, 6862.1224, 6862.1224] +25-08-28 12:39:34 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:39:34 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:39:34 | D | - sum error = [ 6991.7437, 7048.7888, 7140.0367, 7204.6760, 7325.1113] +25-08-28 12:39:34 | D | - best error = [ 6862.1224, 6862.1224, 6862.1224, 6862.1224, 6862.1224] +25-08-28 12:39:34 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:39:34 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:39:34 | D | - sum error = [ 7439.9207, 7600.2058, 7827.2488, 7993.5396] +25-08-28 12:39:34 | D | - best error = [ 6862.1224, 6862.1224, 6862.1224, 6862.1224] +25-08-28 12:39:34 | D | + error = 6862.1224 +25-08-28 12:39:34 | D | + scale = [min=0.9467, max=1.3088] +25-08-28 12:39:52 | D | - Smoothing Diffusion Block single_transformer_blocks.29 +25-08-28 12:39:52 | D | - Skipping Module single_transformer_blocks.29.norm.linear +25-08-28 12:39:52 | D | - Smoothing Transformer Block single_transformer_blocks.29 +25-08-28 12:39:52 | D | - single_transformer_blocks.29.attn.qkv_proj + single_transformer_blocks.29.up_proj +25-08-28 12:39:52 | D | + w: sint4 +25-08-28 12:39:52 | D | + x: sint4 +25-08-28 12:39:52 | D | + y: None +25-08-28 12:39:52 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:39:52 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:39:53 | D | + x - AbsMax +25-08-28 12:39:53 | D | + x = [min=0.4297, max=25.1250] +25-08-28 12:39:53 | D | + w - AbsMax +25-08-28 12:39:53 | D | + w = [min=0.1055, max=0.6914] +25-08-28 12:39:53 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:39:54 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:41:59 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:41:59 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:41:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:41:59 | D | - sum error = [126124.2765, 121385.3474, 116426.5372, 112241.7596, 108173.3578] +25-08-28 12:41:59 | D | - best error = [126124.2765, 121385.3474, 116426.5372, 112241.7596, 108173.3578] +25-08-28 12:41:59 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:41:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:41:59 | D | - sum error = [104722.8443, 102064.2195, 99574.9045, 97238.5807, 95637.6629] +25-08-28 12:41:59 | D | - best error = [104722.8443, 102064.2195, 99574.9045, 97238.5807, 95637.6629] +25-08-28 12:41:59 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:41:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:41:59 | D | - sum error = [94441.5204, 93106.2753, 92843.7028, 92633.4809, 92406.6643] +25-08-28 12:41:59 | D | - best error = [94441.5204, 93106.2753, 92843.7028, 92633.4809, 92406.6643] +25-08-28 12:41:59 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:41:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:41:59 | D | - sum error = [92709.0147, 93067.5536, 94022.2952, 95447.1226, 96514.8806] +25-08-28 12:41:59 | D | - best error = [92406.6643, 92406.6643, 92406.6643, 92406.6643, 92406.6643] +25-08-28 12:41:59 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:41:59 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:41:59 | D | - sum error = [182108.6459, 169824.4492, 158921.7234, 148806.7783, 139590.9984] +25-08-28 12:41:59 | D | - best error = [92406.6643, 92406.6643, 92406.6643, 92406.6643, 92406.6643] +25-08-28 12:41:59 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:41:59 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:41:59 | D | - sum error = [130903.8022, 123197.0784, 116344.2286, 110620.5565, 105920.0145] +25-08-28 12:41:59 | D | - best error = [92406.6643, 92406.6643, 92406.6643, 92406.6643, 92406.6643] +25-08-28 12:41:59 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:41:59 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:41:59 | D | - sum error = [101714.7321, 98467.9570, 96295.7099, 94657.3144, 93775.4363] +25-08-28 12:41:59 | D | - best error = [92406.6643, 92406.6643, 92406.6643, 92406.6643, 92406.6643] +25-08-28 12:41:59 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:41:59 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:41:59 | D | - sum error = [93611.3279, 93787.4636, 94672.7287, 96105.2596] +25-08-28 12:41:59 | D | - best error = [92406.6643, 92406.6643, 92406.6643, 92406.6643] +25-08-28 12:41:59 | D | + error = 92406.6643 +25-08-28 12:41:59 | D | + scale = [min=0.5536, max=9.5516] +25-08-28 12:42:00 | D | - single_transformer_blocks.29.attn.out_proj +25-08-28 12:42:00 | D | + w: sint4 +25-08-28 12:42:00 | D | + x: sint4 +25-08-28 12:42:00 | D | + y: None +25-08-28 12:42:00 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:42:00 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:42:00 | D | + x - AbsMax +25-08-28 12:42:00 | D | + x = [min=0.4824, max=9.0625] +25-08-28 12:42:00 | D | + w - AbsMax +25-08-28 12:42:00 | D | + w = [min=0.0674, max=0.4180] +25-08-28 12:42:00 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:42:01 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:42:59 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:42:59 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:42:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:42:59 | D | - sum error = [ 6568.4199, 6457.9582, 6378.6999, 6296.0057, 6194.6642] +25-08-28 12:42:59 | D | - best error = [ 6568.4199, 6457.9582, 6378.6999, 6296.0057, 6194.6642] +25-08-28 12:42:59 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:42:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:42:59 | D | - sum error = [ 6146.1949, 6092.8174, 6049.4509, 6016.7579, 5979.3444] +25-08-28 12:42:59 | D | - best error = [ 6146.1949, 6092.8174, 6049.4509, 6016.7579, 5979.3444] +25-08-28 12:42:59 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:42:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:42:59 | D | - sum error = [ 5962.2046, 5936.9965, 5943.8799, 5903.6230, 5925.7043] +25-08-28 12:42:59 | D | - best error = [ 5962.2046, 5936.9965, 5936.9965, 5903.6230, 5903.6230] +25-08-28 12:42:59 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:42:59 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:42:59 | D | - sum error = [ 5951.9664, 5958.9294, 5990.6384, 6039.0748, 6094.3344] +25-08-28 12:42:59 | D | - best error = [ 5903.6230, 5903.6230, 5903.6230, 5903.6230, 5903.6230] +25-08-28 12:42:59 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:42:59 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:42:59 | D | - sum error = [ 6832.5802, 6662.7166, 6536.4718, 6426.5842, 6297.3626] +25-08-28 12:42:59 | D | - best error = [ 5903.6230, 5903.6230, 5903.6230, 5903.6230, 5903.6230] +25-08-28 12:42:59 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:42:59 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:42:59 | D | - sum error = [ 6231.2612, 6141.0592, 6078.4570, 6034.1733, 6007.4793] +25-08-28 12:42:59 | D | - best error = [ 5903.6230, 5903.6230, 5903.6230, 5903.6230, 5903.6230] +25-08-28 12:42:59 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:42:59 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:42:59 | D | - sum error = [ 5957.5609, 5941.2386, 5934.1857, 5926.4431, 5942.8973] +25-08-28 12:42:59 | D | - best error = [ 5903.6230, 5903.6230, 5903.6230, 5903.6230, 5903.6230] +25-08-28 12:42:59 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:42:59 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:42:59 | D | - sum error = [ 5967.9296, 5984.5972, 6012.6876, 6078.2129] +25-08-28 12:42:59 | D | - best error = [ 5903.6230, 5903.6230, 5903.6230, 5903.6230] +25-08-28 12:42:59 | D | + error = 5903.6230 +25-08-28 12:42:59 | D | + scale = [min=0.6226, max=4.1900] +25-08-28 12:42:59 | D | - single_transformer_blocks.29.down_proj +25-08-28 12:42:59 | D | + w: sint4 +25-08-28 12:42:59 | D | + x: uint4 +25-08-28 12:42:59 | D | + y: None +25-08-28 12:42:59 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:42:59 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:43:02 | D | + x - AbsMax +25-08-28 12:43:02 | D | + x = [min=1.0938, max=11.2500] +25-08-28 12:43:02 | D | + w - AbsMax +25-08-28 12:43:02 | D | + w = [min=0.1118, max=1.1094] +25-08-28 12:43:02 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:43:04 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:45:14 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:45:14 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:45:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:45:14 | D | - sum error = [ 7466.8890, 7409.9830, 7400.5857, 7379.3287, 7397.9486] +25-08-28 12:45:14 | D | - best error = [ 7466.8890, 7409.9830, 7400.5857, 7379.3287, 7379.3287] +25-08-28 12:45:14 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:45:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:45:14 | D | - sum error = [ 7408.9667, 7490.3853, 7543.8865, 7612.1077, 7675.7908] +25-08-28 12:45:14 | D | - best error = [ 7379.3287, 7379.3287, 7379.3287, 7379.3287, 7379.3287] +25-08-28 12:45:14 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:45:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:45:14 | D | - sum error = [ 7684.8951, 7782.8026, 7874.9710, 7986.8377, 8131.1628] +25-08-28 12:45:14 | D | - best error = [ 7379.3287, 7379.3287, 7379.3287, 7379.3287, 7379.3287] +25-08-28 12:45:14 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:45:14 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:45:14 | D | - sum error = [ 8194.7647, 8320.6139, 8441.3089, 8585.3289, 8745.8562] +25-08-28 12:45:14 | D | - best error = [ 7379.3287, 7379.3287, 7379.3287, 7379.3287, 7379.3287] +25-08-28 12:45:14 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:45:14 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:45:14 | D | - sum error = [ 8055.0911, 7905.2825, 7768.9863, 7648.1213, 7576.5404] +25-08-28 12:45:14 | D | - best error = [ 7379.3287, 7379.3287, 7379.3287, 7379.3287, 7379.3287] +25-08-28 12:45:14 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:45:14 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:45:14 | D | - sum error = [ 7552.7815, 7518.9433, 7465.9786, 7470.2311, 7461.9397] +25-08-28 12:45:14 | D | - best error = [ 7379.3287, 7379.3287, 7379.3287, 7379.3287, 7379.3287] +25-08-28 12:45:14 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:45:14 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:45:14 | D | - sum error = [ 7557.0963, 7621.3595, 7711.3885, 7790.9940, 7938.0345] +25-08-28 12:45:14 | D | - best error = [ 7379.3287, 7379.3287, 7379.3287, 7379.3287, 7379.3287] +25-08-28 12:45:14 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:45:14 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:45:14 | D | - sum error = [ 8098.0467, 8286.3092, 8455.5842, 8627.5137] +25-08-28 12:45:14 | D | - best error = [ 7379.3287, 7379.3287, 7379.3287, 7379.3287] +25-08-28 12:45:14 | D | + error = 7379.3287 +25-08-28 12:45:14 | D | + scale = [min=1.0135, max=1.4377] +25-08-28 12:45:33 | D | - Smoothing Diffusion Block single_transformer_blocks.30 +25-08-28 12:45:33 | D | - Skipping Module single_transformer_blocks.30.norm.linear +25-08-28 12:45:33 | D | - Smoothing Transformer Block single_transformer_blocks.30 +25-08-28 12:45:33 | D | - single_transformer_blocks.30.attn.qkv_proj + single_transformer_blocks.30.up_proj +25-08-28 12:45:33 | D | + w: sint4 +25-08-28 12:45:33 | D | + x: sint4 +25-08-28 12:45:33 | D | + y: None +25-08-28 12:45:33 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:45:33 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 12:45:33 | D | + x - AbsMax +25-08-28 12:45:33 | D | + x = [min=0.7461, max=24.2500] +25-08-28 12:45:33 | D | + w - AbsMax +25-08-28 12:45:33 | D | + w = [min=0.0928, max=1.5859] +25-08-28 12:45:33 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 12:45:35 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 12:47:41 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:47:41 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:47:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:47:41 | D | - sum error = [160305.8836, 154695.9104, 149058.2396, 144477.5654, 140456.3588] +25-08-28 12:47:41 | D | - best error = [160305.8836, 154695.9104, 149058.2396, 144477.5654, 140456.3588] +25-08-28 12:47:41 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:47:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:47:41 | D | - sum error = [136788.8950, 132968.7217, 129324.6576, 126779.6021, 124700.8919] +25-08-28 12:47:41 | D | - best error = [136788.8950, 132968.7217, 129324.6576, 126779.6021, 124700.8919] +25-08-28 12:47:41 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:47:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:47:41 | D | - sum error = [123306.6884, 122112.2558, 120908.7341, 121086.1987, 120483.8841] +25-08-28 12:47:41 | D | - best error = [123306.6884, 122112.2558, 120908.7341, 120908.7341, 120483.8841] +25-08-28 12:47:41 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:47:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:47:41 | D | - sum error = [120471.8616, 120294.8186, 121172.0681, 121868.7640, 122998.4471] +25-08-28 12:47:41 | D | - best error = [120471.8616, 120294.8186, 120294.8186, 120294.8186, 120294.8186] +25-08-28 12:47:41 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:47:41 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:47:41 | D | - sum error = [251626.0174, 237312.6386, 223900.1286, 211603.4353, 199669.7126] +25-08-28 12:47:41 | D | - best error = [120294.8186, 120294.8186, 120294.8186, 120294.8186, 120294.8186] +25-08-28 12:47:41 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:47:41 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:47:41 | D | - sum error = [187699.6927, 176878.5172, 167378.1222, 157753.8076, 151045.1321] +25-08-28 12:47:41 | D | - best error = [120294.8186, 120294.8186, 120294.8186, 120294.8186, 120294.8186] +25-08-28 12:47:41 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:47:41 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:47:41 | D | - sum error = [144153.9515, 137776.7795, 133007.7527, 129014.0909, 126716.4848] +25-08-28 12:47:41 | D | - best error = [120294.8186, 120294.8186, 120294.8186, 120294.8186, 120294.8186] +25-08-28 12:47:41 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:47:41 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:47:41 | D | - sum error = [124695.5583, 123310.7201, 122901.4856, 123897.7767] +25-08-28 12:47:41 | D | - best error = [120294.8186, 120294.8186, 120294.8186, 120294.8186] +25-08-28 12:47:41 | D | + error = 120294.8186 +25-08-28 12:47:41 | D | + scale = [min=0.7911, max=12.8165] +25-08-28 12:47:41 | D | - single_transformer_blocks.30.attn.out_proj +25-08-28 12:47:41 | D | + w: sint4 +25-08-28 12:47:41 | D | + x: sint4 +25-08-28 12:47:41 | D | + y: None +25-08-28 12:47:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:47:41 | D | + finished parsing calibration arguments, ram usage: 17.7 +25-08-28 12:47:42 | D | + x - AbsMax +25-08-28 12:47:42 | D | + x = [min=0.7422, max=9.9375] +25-08-28 12:47:42 | D | + w - AbsMax +25-08-28 12:47:42 | D | + w = [min=0.1030, max=0.3574] +25-08-28 12:47:42 | D | + finished resetting calibrator, ram usage: 17.8 +25-08-28 12:47:43 | D | + finished calculating the original outputs, ram usage: 17.8 +25-08-28 12:48:42 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:48:42 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:48:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:48:42 | D | - sum error = [ 7092.2273, 7049.2374, 7008.2078, 6990.2074, 6960.8376] +25-08-28 12:48:42 | D | - best error = [ 7092.2273, 7049.2374, 7008.2078, 6990.2074, 6960.8376] +25-08-28 12:48:42 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:48:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:48:42 | D | - sum error = [ 6913.8403, 6888.1701, 6852.0788, 6823.7787, 6818.7374] +25-08-28 12:48:42 | D | - best error = [ 6913.8403, 6888.1701, 6852.0788, 6823.7787, 6818.7374] +25-08-28 12:48:42 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:48:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:48:42 | D | - sum error = [ 6824.4235, 6810.4208, 6802.2693, 6780.8601, 6802.4337] +25-08-28 12:48:42 | D | - best error = [ 6818.7374, 6810.4208, 6802.2693, 6780.8601, 6780.8601] +25-08-28 12:48:42 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:48:42 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:48:42 | D | - sum error = [ 6822.7458, 6847.2196, 6891.3813, 6909.6043, 6956.1285] +25-08-28 12:48:42 | D | - best error = [ 6780.8601, 6780.8601, 6780.8601, 6780.8601, 6780.8601] +25-08-28 12:48:42 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:48:42 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:48:42 | D | - sum error = [ 7574.6593, 7473.9438, 7390.3905, 7309.9187, 7215.2910] +25-08-28 12:48:42 | D | - best error = [ 6780.8601, 6780.8601, 6780.8601, 6780.8601, 6780.8601] +25-08-28 12:48:42 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:48:42 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:48:42 | D | - sum error = [ 7130.1658, 7065.8667, 7005.4410, 6968.2213, 6909.1782] +25-08-28 12:48:42 | D | - best error = [ 6780.8601, 6780.8601, 6780.8601, 6780.8601, 6780.8601] +25-08-28 12:48:42 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:48:42 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:48:42 | D | - sum error = [ 6877.6839, 6848.6921, 6835.1711, 6817.4840, 6820.3460] +25-08-28 12:48:42 | D | - best error = [ 6780.8601, 6780.8601, 6780.8601, 6780.8601, 6780.8601] +25-08-28 12:48:42 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:48:42 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:48:42 | D | - sum error = [ 6868.8278, 6889.2569, 6919.3271, 6954.7405] +25-08-28 12:48:42 | D | - best error = [ 6780.8601, 6780.8601, 6780.8601, 6780.8601] +25-08-28 12:48:42 | D | + error = 6780.8601 +25-08-28 12:48:42 | D | + scale = [min=0.8238, max=4.4487] +25-08-28 12:48:42 | D | - single_transformer_blocks.30.down_proj +25-08-28 12:48:42 | D | + w: sint4 +25-08-28 12:48:42 | D | + x: uint4 +25-08-28 12:48:42 | D | + y: None +25-08-28 12:48:42 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:48:42 | D | + finished parsing calibration arguments, ram usage: 17.8 +25-08-28 12:48:45 | D | + x - AbsMax +25-08-28 12:48:45 | D | + x = [min=0.6953, max=14.6875] +25-08-28 12:48:45 | D | + w - AbsMax +25-08-28 12:48:45 | D | + w = [min=0.1069, max=1.3984] +25-08-28 12:48:45 | D | + finished resetting calibrator, ram usage: 17.8 +25-08-28 12:48:47 | D | + finished calculating the original outputs, ram usage: 17.8 +25-08-28 12:50:44 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:50:44 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:50:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:50:44 | D | - sum error = [ 7646.6599, 7595.4315, 7573.4682, 7566.0024, 7576.2679] +25-08-28 12:50:44 | D | - best error = [ 7646.6599, 7595.4315, 7573.4682, 7566.0024, 7566.0024] +25-08-28 12:50:44 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:50:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:50:44 | D | - sum error = [ 7577.9440, 7604.9205, 7625.2153, 7640.7348, 7713.5693] +25-08-28 12:50:44 | D | - best error = [ 7566.0024, 7566.0024, 7566.0024, 7566.0024, 7566.0024] +25-08-28 12:50:44 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:50:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:50:44 | D | - sum error = [ 7824.3579, 7892.0654, 7998.7291, 8102.2130, 8225.5451] +25-08-28 12:50:44 | D | - best error = [ 7566.0024, 7566.0024, 7566.0024, 7566.0024, 7566.0024] +25-08-28 12:50:44 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:50:44 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:50:44 | D | - sum error = [ 8338.2842, 8470.8928, 8647.3293, 8865.3641, 9032.1081] +25-08-28 12:50:44 | D | - best error = [ 7566.0024, 7566.0024, 7566.0024, 7566.0024, 7566.0024] +25-08-28 12:50:44 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:50:44 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:50:44 | D | - sum error = [ 8518.6454, 8293.1760, 8112.4462, 7917.6082, 7813.7359] +25-08-28 12:50:44 | D | - best error = [ 7566.0024, 7566.0024, 7566.0024, 7566.0024, 7566.0024] +25-08-28 12:50:44 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:50:44 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:50:44 | D | - sum error = [ 7729.8481, 7636.3499, 7563.5827, 7567.5206, 7582.2733] +25-08-28 12:50:44 | D | - best error = [ 7566.0024, 7566.0024, 7563.5827, 7563.5827, 7563.5827] +25-08-28 12:50:44 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:50:44 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:50:44 | D | - sum error = [ 7628.4759, 7696.0712, 7776.0700, 7877.7431, 8030.8540] +25-08-28 12:50:44 | D | - best error = [ 7563.5827, 7563.5827, 7563.5827, 7563.5827, 7563.5827] +25-08-28 12:50:44 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:50:44 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:50:44 | D | - sum error = [ 8198.5574, 8437.7613, 8695.1531, 8951.1161] +25-08-28 12:50:44 | D | - best error = [ 7563.5827, 7563.5827, 7563.5827, 7563.5827] +25-08-28 12:50:44 | D | + error = 7563.5827 +25-08-28 12:50:44 | D | + scale = [min=1.7529, max=8.4121] +25-08-28 12:51:03 | D | - Smoothing Diffusion Block single_transformer_blocks.31 +25-08-28 12:51:03 | D | - Skipping Module single_transformer_blocks.31.norm.linear +25-08-28 12:51:03 | D | - Smoothing Transformer Block single_transformer_blocks.31 +25-08-28 12:51:03 | D | - single_transformer_blocks.31.attn.qkv_proj + single_transformer_blocks.31.up_proj +25-08-28 12:51:03 | D | + w: sint4 +25-08-28 12:51:03 | D | + x: sint4 +25-08-28 12:51:03 | D | + y: None +25-08-28 12:51:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:51:03 | D | + finished parsing calibration arguments, ram usage: 17.7 +25-08-28 12:51:04 | D | + x - AbsMax +25-08-28 12:51:04 | D | + x = [min=0.2266, max=29.1250] +25-08-28 12:51:04 | D | + w - AbsMax +25-08-28 12:51:04 | D | + w = [min=0.1011, max=0.7383] +25-08-28 12:51:04 | D | + finished resetting calibrator, ram usage: 17.7 +25-08-28 12:51:05 | D | + finished calculating the original outputs, ram usage: 17.7 +25-08-28 12:53:11 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:53:11 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:53:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:53:11 | D | - sum error = [176183.3983, 168129.2973, 161436.1149, 154553.8649, 148895.4464] +25-08-28 12:53:11 | D | - best error = [176183.3983, 168129.2973, 161436.1149, 154553.8649, 148895.4464] +25-08-28 12:53:11 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:53:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:53:11 | D | - sum error = [143789.1719, 140431.5032, 136131.6708, 133257.4829, 131098.4739] +25-08-28 12:53:11 | D | - best error = [143789.1719, 140431.5032, 136131.6708, 133257.4829, 131098.4739] +25-08-28 12:53:11 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:53:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:53:11 | D | - sum error = [128446.9517, 127782.5473, 126642.3722, 126295.3329, 125183.3170] +25-08-28 12:53:11 | D | - best error = [128446.9517, 127782.5473, 126642.3722, 126295.3329, 125183.3170] +25-08-28 12:53:11 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:53:11 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:53:11 | D | - sum error = [124691.2675, 125720.7185, 125599.5108, 126504.3660, 127892.9152] +25-08-28 12:53:11 | D | - best error = [124691.2675, 124691.2675, 124691.2675, 124691.2675, 124691.2675] +25-08-28 12:53:11 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:53:11 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:53:11 | D | - sum error = [279751.2843, 259289.5450, 239974.0573, 221885.8771, 206481.4277] +25-08-28 12:53:11 | D | - best error = [124691.2675, 124691.2675, 124691.2675, 124691.2675, 124691.2675] +25-08-28 12:53:11 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:53:11 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:53:11 | D | - sum error = [191072.9191, 178664.3824, 168161.7994, 158473.5248, 150766.0838] +25-08-28 12:53:11 | D | - best error = [124691.2675, 124691.2675, 124691.2675, 124691.2675, 124691.2675] +25-08-28 12:53:11 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:53:11 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:53:11 | D | - sum error = [143016.2235, 137911.0605, 133774.9504, 130460.9546, 128339.9047] +25-08-28 12:53:11 | D | - best error = [124691.2675, 124691.2675, 124691.2675, 124691.2675, 124691.2675] +25-08-28 12:53:11 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:53:11 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:53:11 | D | - sum error = [127045.4084, 126006.0087, 125513.8313, 127178.7575] +25-08-28 12:53:11 | D | - best error = [124691.2675, 124691.2675, 124691.2675, 124691.2675] +25-08-28 12:53:11 | D | + error = 124691.2675 +25-08-28 12:53:11 | D | + scale = [min=0.3284, max=12.5372] +25-08-28 12:53:12 | D | - single_transformer_blocks.31.attn.out_proj +25-08-28 12:53:12 | D | + w: sint4 +25-08-28 12:53:12 | D | + x: sint4 +25-08-28 12:53:12 | D | + y: None +25-08-28 12:53:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:53:12 | D | + finished parsing calibration arguments, ram usage: 18.7 +25-08-28 12:53:12 | D | + x - AbsMax +25-08-28 12:53:12 | D | + x = [min=0.7422, max=8.9375] +25-08-28 12:53:12 | D | + w - AbsMax +25-08-28 12:53:12 | D | + w = [min=0.0972, max=0.3652] +25-08-28 12:53:12 | D | + finished resetting calibrator, ram usage: 18.7 +25-08-28 12:53:13 | D | + finished calculating the original outputs, ram usage: 18.7 +25-08-28 12:54:10 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:54:10 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:54:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:54:10 | D | - sum error = [ 6431.5575, 6374.3420, 6352.5645, 6319.3750, 6280.2188] +25-08-28 12:54:10 | D | - best error = [ 6431.5575, 6374.3420, 6352.5645, 6319.3750, 6280.2188] +25-08-28 12:54:10 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:54:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:54:10 | D | - sum error = [ 6244.9131, 6226.6402, 6205.3074, 6203.6251, 6187.8112] +25-08-28 12:54:10 | D | - best error = [ 6244.9131, 6226.6402, 6205.3074, 6203.6251, 6187.8112] +25-08-28 12:54:10 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:54:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:54:10 | D | - sum error = [ 6179.0180, 6175.9032, 6201.8001, 6214.5970, 6237.6868] +25-08-28 12:54:10 | D | - best error = [ 6179.0180, 6175.9032, 6175.9032, 6175.9032, 6175.9032] +25-08-28 12:54:10 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:54:10 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:54:10 | D | - sum error = [ 6258.8421, 6306.2363, 6346.9791, 6423.7456, 6468.2628] +25-08-28 12:54:10 | D | - best error = [ 6175.9032, 6175.9032, 6175.9032, 6175.9032, 6175.9032] +25-08-28 12:54:10 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:54:10 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:54:10 | D | - sum error = [ 6739.8605, 6644.3377, 6558.0445, 6491.8725, 6410.9606] +25-08-28 12:54:10 | D | - best error = [ 6175.9032, 6175.9032, 6175.9032, 6175.9032, 6175.9032] +25-08-28 12:54:10 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:54:10 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:54:10 | D | - sum error = [ 6370.0025, 6305.0748, 6269.0443, 6257.4004, 6224.3647] +25-08-28 12:54:10 | D | - best error = [ 6175.9032, 6175.9032, 6175.9032, 6175.9032, 6175.9032] +25-08-28 12:54:10 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:54:10 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:54:10 | D | - sum error = [ 6201.7817, 6196.8715, 6233.6381, 6218.6487, 6262.8508] +25-08-28 12:54:10 | D | - best error = [ 6175.9032, 6175.9032, 6175.9032, 6175.9032, 6175.9032] +25-08-28 12:54:10 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:54:10 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:54:10 | D | - sum error = [ 6299.5940, 6332.8496, 6414.7075, 6454.1416] +25-08-28 12:54:10 | D | - best error = [ 6175.9032, 6175.9032, 6175.9032, 6175.9032] +25-08-28 12:54:10 | D | + error = 6175.9032 +25-08-28 12:54:10 | D | + scale = [min=0.8488, max=3.3356] +25-08-28 12:54:11 | D | - single_transformer_blocks.31.down_proj +25-08-28 12:54:11 | D | + w: sint4 +25-08-28 12:54:11 | D | + x: uint4 +25-08-28 12:54:11 | D | + y: None +25-08-28 12:54:11 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:54:11 | D | + finished parsing calibration arguments, ram usage: 18.7 +25-08-28 12:54:13 | D | + x - AbsMax +25-08-28 12:54:13 | D | + x = [min=0.6445, max=15.3125] +25-08-28 12:54:13 | D | + w - AbsMax +25-08-28 12:54:13 | D | + w = [min=0.1064, max=1.3906] +25-08-28 12:54:13 | D | + finished resetting calibrator, ram usage: 18.7 +25-08-28 12:54:14 | D | + finished calculating the original outputs, ram usage: 18.7 +25-08-28 12:56:02 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:56:02 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:56:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:56:02 | D | - sum error = [ 7146.1682, 7108.8317, 7075.5752, 7056.9058, 7091.7896] +25-08-28 12:56:02 | D | - best error = [ 7146.1682, 7108.8317, 7075.5752, 7056.9058, 7056.9058] +25-08-28 12:56:02 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:56:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:56:02 | D | - sum error = [ 7096.0862, 7101.5497, 7145.4842, 7195.1723, 7232.0044] +25-08-28 12:56:02 | D | - best error = [ 7056.9058, 7056.9058, 7056.9058, 7056.9058, 7056.9058] +25-08-28 12:56:02 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:56:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:56:02 | D | - sum error = [ 7296.1382, 7391.0587, 7479.9877, 7629.5006, 7667.9979] +25-08-28 12:56:02 | D | - best error = [ 7056.9058, 7056.9058, 7056.9058, 7056.9058, 7056.9058] +25-08-28 12:56:02 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:56:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:56:02 | D | - sum error = [ 7751.2918, 7878.0244, 7985.0560, 8134.4645, 8285.4204] +25-08-28 12:56:02 | D | - best error = [ 7056.9058, 7056.9058, 7056.9058, 7056.9058, 7056.9058] +25-08-28 12:56:02 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:56:02 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:56:02 | D | - sum error = [ 7787.4360, 7648.1789, 7516.9292, 7373.0555, 7295.9951] +25-08-28 12:56:02 | D | - best error = [ 7056.9058, 7056.9058, 7056.9058, 7056.9058, 7056.9058] +25-08-28 12:56:02 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:56:02 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:56:02 | D | - sum error = [ 7219.2437, 7134.5806, 7099.2118, 7120.5949, 7090.2767] +25-08-28 12:56:02 | D | - best error = [ 7056.9058, 7056.9058, 7056.9058, 7056.9058, 7056.9058] +25-08-28 12:56:02 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:56:02 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:56:02 | D | - sum error = [ 7137.7492, 7186.9409, 7281.9470, 7399.7276, 7509.6221] +25-08-28 12:56:02 | D | - best error = [ 7056.9058, 7056.9058, 7056.9058, 7056.9058, 7056.9058] +25-08-28 12:56:02 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:56:02 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:56:02 | D | - sum error = [ 7634.2570, 7846.2631, 8046.0112, 8201.1331] +25-08-28 12:56:02 | D | - best error = [ 7056.9058, 7056.9058, 7056.9058, 7056.9058] +25-08-28 12:56:02 | D | + error = 7056.9058 +25-08-28 12:56:02 | D | + scale = [min=0.9362, max=1.5058] +25-08-28 12:56:20 | D | - Smoothing Diffusion Block single_transformer_blocks.32 +25-08-28 12:56:20 | D | - Skipping Module single_transformer_blocks.32.norm.linear +25-08-28 12:56:20 | D | - Smoothing Transformer Block single_transformer_blocks.32 +25-08-28 12:56:20 | D | - single_transformer_blocks.32.attn.qkv_proj + single_transformer_blocks.32.up_proj +25-08-28 12:56:20 | D | + w: sint4 +25-08-28 12:56:20 | D | + x: sint4 +25-08-28 12:56:20 | D | + y: None +25-08-28 12:56:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:56:20 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 12:56:21 | D | + x - AbsMax +25-08-28 12:56:21 | D | + x = [min=0.5352, max=23.5000] +25-08-28 12:56:21 | D | + w - AbsMax +25-08-28 12:56:21 | D | + w = [min=0.1562, max=1.4219] +25-08-28 12:56:21 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 12:56:22 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 12:58:28 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:58:28 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:58:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:58:28 | D | - sum error = [230661.6292, 220694.4397, 210858.4972, 201803.6406, 194092.6387] +25-08-28 12:58:28 | D | - best error = [230661.6292, 220694.4397, 210858.4972, 201803.6406, 194092.6387] +25-08-28 12:58:28 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:58:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:58:28 | D | - sum error = [186706.7800, 180753.5402, 174839.5957, 169700.1485, 164587.8246] +25-08-28 12:58:28 | D | - best error = [186706.7800, 180753.5402, 174839.5957, 169700.1485, 164587.8246] +25-08-28 12:58:28 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:58:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:58:28 | D | - sum error = [160922.8044, 158584.6455, 155881.0426, 154314.2593, 152668.2003] +25-08-28 12:58:28 | D | - best error = [160922.8044, 158584.6455, 155881.0426, 154314.2593, 152668.2003] +25-08-28 12:58:28 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:58:28 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:58:28 | D | - sum error = [151373.8751, 151372.0012, 151195.6960, 151898.3542, 152241.5731] +25-08-28 12:58:28 | D | - best error = [151373.8751, 151372.0012, 151195.6960, 151195.6960, 151195.6960] +25-08-28 12:58:28 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:58:28 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:58:28 | D | - sum error = [375654.7699, 348301.6183, 321655.3664, 298657.5069, 278100.9226] +25-08-28 12:58:28 | D | - best error = [151195.6960, 151195.6960, 151195.6960, 151195.6960, 151195.6960] +25-08-28 12:58:28 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:58:28 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:58:28 | D | - sum error = [259027.3558, 240743.3055, 223831.8821, 209329.2272, 196753.8626] +25-08-28 12:58:28 | D | - best error = [151195.6960, 151195.6960, 151195.6960, 151195.6960, 151195.6960] +25-08-28 12:58:28 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:58:28 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:58:28 | D | - sum error = [187529.1083, 177648.4143, 169222.2087, 162886.6513, 158640.7092] +25-08-28 12:58:28 | D | - best error = [151195.6960, 151195.6960, 151195.6960, 151195.6960, 151195.6960] +25-08-28 12:58:28 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:58:28 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:58:28 | D | - sum error = [156195.9303, 153139.7438, 152424.0922, 152171.5714] +25-08-28 12:58:28 | D | - best error = [151195.6960, 151195.6960, 151195.6960, 151195.6960] +25-08-28 12:58:28 | D | + error = 151195.6960 +25-08-28 12:58:28 | D | + scale = [min=0.5878, max=14.6355] +25-08-28 12:58:28 | D | - single_transformer_blocks.32.attn.out_proj +25-08-28 12:58:28 | D | + w: sint4 +25-08-28 12:58:28 | D | + x: sint4 +25-08-28 12:58:28 | D | + y: None +25-08-28 12:58:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:58:28 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 12:58:28 | D | + x - AbsMax +25-08-28 12:58:28 | D | + x = [min=0.8281, max=11.5625] +25-08-28 12:58:28 | D | + w - AbsMax +25-08-28 12:58:28 | D | + w = [min=0.1055, max=0.4199] +25-08-28 12:58:28 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 12:58:29 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 12:59:27 | D | - x / w range = AbsMax / AbsMax +25-08-28 12:59:27 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 12:59:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:59:27 | D | - sum error = [10039.4562, 9934.9879, 9873.0392, 9790.3258, 9689.4475] +25-08-28 12:59:27 | D | - best error = [10039.4562, 9934.9879, 9873.0392, 9790.3258, 9689.4475] +25-08-28 12:59:27 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 12:59:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:59:27 | D | - sum error = [ 9607.1519, 9517.9998, 9450.5229, 9400.2766, 9344.1939] +25-08-28 12:59:27 | D | - best error = [ 9607.1519, 9517.9998, 9450.5229, 9400.2766, 9344.1939] +25-08-28 12:59:27 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 12:59:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:59:27 | D | - sum error = [ 9312.3935, 9302.4474, 9264.5363, 9248.1639, 9262.2347] +25-08-28 12:59:27 | D | - best error = [ 9312.3935, 9302.4474, 9264.5363, 9248.1639, 9248.1639] +25-08-28 12:59:27 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:59:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 12:59:27 | D | - sum error = [ 9282.9159, 9275.2686, 9312.8797, 9347.8656, 9425.4320] +25-08-28 12:59:27 | D | - best error = [ 9248.1639, 9248.1639, 9248.1639, 9248.1639, 9248.1639] +25-08-28 12:59:27 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 12:59:27 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 12:59:27 | D | - sum error = [10454.8615, 10277.6149, 10130.7173, 9969.2202, 9862.3101] +25-08-28 12:59:27 | D | - best error = [ 9248.1639, 9248.1639, 9248.1639, 9248.1639, 9248.1639] +25-08-28 12:59:27 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 12:59:27 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 12:59:27 | D | - sum error = [ 9731.2096, 9604.1924, 9540.7614, 9467.3865, 9406.7414] +25-08-28 12:59:27 | D | - best error = [ 9248.1639, 9248.1639, 9248.1639, 9248.1639, 9248.1639] +25-08-28 12:59:27 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 12:59:27 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 12:59:27 | D | - sum error = [ 9377.2845, 9308.1813, 9261.7441, 9264.9127, 9274.1689] +25-08-28 12:59:27 | D | - best error = [ 9248.1639, 9248.1639, 9248.1639, 9248.1639, 9248.1639] +25-08-28 12:59:27 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 12:59:27 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 12:59:27 | D | - sum error = [ 9267.3297, 9278.7986, 9341.1605, 9433.3163] +25-08-28 12:59:27 | D | - best error = [ 9248.1639, 9248.1639, 9248.1639, 9248.1639] +25-08-28 12:59:27 | D | + error = 9248.1639 +25-08-28 12:59:27 | D | + scale = [min=0.8846, max=4.9089] +25-08-28 12:59:27 | D | - single_transformer_blocks.32.down_proj +25-08-28 12:59:27 | D | + w: sint4 +25-08-28 12:59:27 | D | + x: uint4 +25-08-28 12:59:27 | D | + y: None +25-08-28 12:59:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 12:59:27 | D | + finished parsing calibration arguments, ram usage: 16.1 +25-08-28 12:59:29 | D | + x - AbsMax +25-08-28 12:59:29 | D | + x = [min=0.4570, max=19.1250] +25-08-28 12:59:29 | D | + w - AbsMax +25-08-28 12:59:29 | D | + w = [min=0.1069, max=1.1719] +25-08-28 12:59:29 | D | + finished resetting calibrator, ram usage: 16.4 +25-08-28 12:59:31 | D | + finished calculating the original outputs, ram usage: 16.8 +25-08-28 13:01:21 | D | - x / w range = AbsMax / AbsMax +25-08-28 13:01:21 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 13:01:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:01:21 | D | - sum error = [ 8930.9717, 8898.7064, 8907.3354, 8927.3418, 8907.8213] +25-08-28 13:01:21 | D | - best error = [ 8930.9717, 8898.7064, 8898.7064, 8898.7064, 8898.7064] +25-08-28 13:01:21 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 13:01:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:01:21 | D | - sum error = [ 8867.7230, 8870.9364, 8914.5907, 8982.7782, 9037.2633] +25-08-28 13:01:21 | D | - best error = [ 8867.7230, 8867.7230, 8867.7230, 8867.7230, 8867.7230] +25-08-28 13:01:21 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 13:01:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:01:21 | D | - sum error = [ 9135.5613, 9204.4885, 9328.3658, 9404.3658, 9538.7710] +25-08-28 13:01:21 | D | - best error = [ 8867.7230, 8867.7230, 8867.7230, 8867.7230, 8867.7230] +25-08-28 13:01:21 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:01:21 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:01:21 | D | - sum error = [ 9686.7675, 9846.5287, 10054.3976, 10230.4389, 10390.1230] +25-08-28 13:01:21 | D | - best error = [ 8867.7230, 8867.7230, 8867.7230, 8867.7230, 8867.7230] +25-08-28 13:01:21 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 13:01:21 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 13:01:21 | D | - sum error = [ 9588.9428, 9349.3953, 9155.1682, 9015.1650, 8900.7856] +25-08-28 13:01:21 | D | - best error = [ 8867.7230, 8867.7230, 8867.7230, 8867.7230, 8867.7230] +25-08-28 13:01:21 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 13:01:21 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 13:01:21 | D | - sum error = [ 8819.7927, 8766.2772, 8764.8571, 8731.8181, 8782.3177] +25-08-28 13:01:21 | D | - best error = [ 8819.7927, 8766.2772, 8764.8571, 8731.8181, 8731.8181] +25-08-28 13:01:21 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 13:01:21 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 13:01:21 | D | - sum error = [ 8850.1381, 8926.0607, 9033.4738, 9199.8257, 9378.5671] +25-08-28 13:01:21 | D | - best error = [ 8731.8181, 8731.8181, 8731.8181, 8731.8181, 8731.8181] +25-08-28 13:01:21 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:01:21 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 13:01:21 | D | - sum error = [ 9569.5546, 9765.1142, 10085.1281, 10332.6283] +25-08-28 13:01:21 | D | - best error = [ 8731.8181, 8731.8181, 8731.8181, 8731.8181] +25-08-28 13:01:21 | D | + error = 8731.8181 +25-08-28 13:01:21 | D | + scale = [min=0.8611, max=8.3899] +25-08-28 13:01:38 | D | - Smoothing Diffusion Block single_transformer_blocks.33 +25-08-28 13:01:38 | D | - Skipping Module single_transformer_blocks.33.norm.linear +25-08-28 13:01:38 | D | - Smoothing Transformer Block single_transformer_blocks.33 +25-08-28 13:01:38 | D | - single_transformer_blocks.33.attn.qkv_proj + single_transformer_blocks.33.up_proj +25-08-28 13:01:38 | D | + w: sint4 +25-08-28 13:01:38 | D | + x: sint4 +25-08-28 13:01:38 | D | + y: None +25-08-28 13:01:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:01:38 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 13:01:39 | D | + x - AbsMax +25-08-28 13:01:39 | D | + x = [min=0.2617, max=23.1250] +25-08-28 13:01:39 | D | + w - AbsMax +25-08-28 13:01:39 | D | + w = [min=0.1426, max=1.3828] +25-08-28 13:01:39 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 13:01:40 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 13:03:46 | D | - x / w range = AbsMax / AbsMax +25-08-28 13:03:46 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 13:03:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:03:46 | D | - sum error = [212253.7012, 204168.5265, 198000.9716, 192467.0479, 186658.4769] +25-08-28 13:03:46 | D | - best error = [212253.7012, 204168.5265, 198000.9716, 192467.0479, 186658.4769] +25-08-28 13:03:46 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 13:03:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:03:46 | D | - sum error = [181349.2598, 175562.5866, 171561.9471, 168254.8802, 165614.8183] +25-08-28 13:03:46 | D | - best error = [181349.2598, 175562.5866, 171561.9471, 168254.8802, 165614.8183] +25-08-28 13:03:46 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 13:03:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:03:46 | D | - sum error = [162305.7287, 161045.0543, 159519.3549, 158183.4285, 157497.4269] +25-08-28 13:03:46 | D | - best error = [162305.7287, 161045.0543, 159519.3549, 158183.4285, 157497.4269] +25-08-28 13:03:46 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:03:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:03:46 | D | - sum error = [157083.8052, 156938.9229, 157468.6868, 158119.9108, 159611.1050] +25-08-28 13:03:46 | D | - best error = [157083.8052, 156938.9229, 156938.9229, 156938.9229, 156938.9229] +25-08-28 13:03:46 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 13:03:46 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 13:03:46 | D | - sum error = [362145.8648, 338151.0967, 315635.3347, 293106.5988, 273931.1549] +25-08-28 13:03:46 | D | - best error = [156938.9229, 156938.9229, 156938.9229, 156938.9229, 156938.9229] +25-08-28 13:03:46 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 13:03:46 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 13:03:46 | D | - sum error = [256533.0770, 240541.2904, 225145.8419, 213752.8909, 201384.5909] +25-08-28 13:03:46 | D | - best error = [156938.9229, 156938.9229, 156938.9229, 156938.9229, 156938.9229] +25-08-28 13:03:46 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 13:03:46 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 13:03:46 | D | - sum error = [192031.8587, 183074.9617, 175739.5367, 169234.8189, 165491.0560] +25-08-28 13:03:46 | D | - best error = [156938.9229, 156938.9229, 156938.9229, 156938.9229, 156938.9229] +25-08-28 13:03:46 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:03:46 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 13:03:46 | D | - sum error = [162576.9692, 160421.8184, 159927.9006, 159037.0833] +25-08-28 13:03:46 | D | - best error = [156938.9229, 156938.9229, 156938.9229, 156938.9229] +25-08-28 13:03:46 | D | + error = 156938.9229 +25-08-28 13:03:46 | D | + scale = [min=0.3422, max=12.3386] +25-08-28 13:03:46 | D | - single_transformer_blocks.33.attn.out_proj +25-08-28 13:03:46 | D | + w: sint4 +25-08-28 13:03:46 | D | + x: sint4 +25-08-28 13:03:46 | D | + y: None +25-08-28 13:03:46 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:03:46 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 13:03:47 | D | + x - AbsMax +25-08-28 13:03:47 | D | + x = [min=0.7188, max=8.6875] +25-08-28 13:03:47 | D | + w - AbsMax +25-08-28 13:03:47 | D | + w = [min=0.1035, max=0.3379] +25-08-28 13:03:47 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 13:03:48 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 13:04:48 | D | - x / w range = AbsMax / AbsMax +25-08-28 13:04:48 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 13:04:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:04:48 | D | - sum error = [ 7982.5545, 7935.9143, 7912.8708, 7841.7773, 7795.1677] +25-08-28 13:04:48 | D | - best error = [ 7982.5545, 7935.9143, 7912.8708, 7841.7773, 7795.1677] +25-08-28 13:04:48 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 13:04:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:04:48 | D | - sum error = [ 7800.2415, 7788.2625, 7761.4597, 7745.2393, 7764.3514] +25-08-28 13:04:48 | D | - best error = [ 7795.1677, 7788.2625, 7761.4597, 7745.2393, 7745.2393] +25-08-28 13:04:48 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 13:04:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:04:48 | D | - sum error = [ 7725.4975, 7757.5571, 7805.3920, 7806.3810, 7828.0284] +25-08-28 13:04:48 | D | - best error = [ 7725.4975, 7725.4975, 7725.4975, 7725.4975, 7725.4975] +25-08-28 13:04:48 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:04:48 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:04:48 | D | - sum error = [ 7860.9211, 7908.8429, 7992.9126, 8032.6658, 8117.4035] +25-08-28 13:04:48 | D | - best error = [ 7725.4975, 7725.4975, 7725.4975, 7725.4975, 7725.4975] +25-08-28 13:04:48 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 13:04:48 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 13:04:48 | D | - sum error = [ 8181.7785, 8111.0165, 8045.1618, 7959.9646, 7907.8776] +25-08-28 13:04:48 | D | - best error = [ 7725.4975, 7725.4975, 7725.4975, 7725.4975, 7725.4975] +25-08-28 13:04:48 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 13:04:48 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 13:04:48 | D | - sum error = [ 7833.6247, 7792.0865, 7769.0880, 7758.9979, 7741.5138] +25-08-28 13:04:48 | D | - best error = [ 7725.4975, 7725.4975, 7725.4975, 7725.4975, 7725.4975] +25-08-28 13:04:48 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 13:04:48 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 13:04:48 | D | - sum error = [ 7722.4659, 7746.1753, 7742.3784, 7775.5589, 7809.9849] +25-08-28 13:04:48 | D | - best error = [ 7722.4659, 7722.4659, 7722.4659, 7722.4659, 7722.4659] +25-08-28 13:04:48 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:04:48 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 13:04:48 | D | - sum error = [ 7840.3819, 7902.5234, 7980.5539, 8085.9711] +25-08-28 13:04:48 | D | - best error = [ 7722.4659, 7722.4659, 7722.4659, 7722.4659] +25-08-28 13:04:48 | D | + error = 7722.4659 +25-08-28 13:04:48 | D | + scale = [min=1.5043, max=6.9626] +25-08-28 13:04:48 | D | - single_transformer_blocks.33.down_proj +25-08-28 13:04:48 | D | + w: sint4 +25-08-28 13:04:48 | D | + x: uint4 +25-08-28 13:04:48 | D | + y: None +25-08-28 13:04:48 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:04:48 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 13:04:50 | D | + x - AbsMax +25-08-28 13:04:50 | D | + x = [min=0.4570, max=11.9375] +25-08-28 13:04:50 | D | + w - AbsMax +25-08-28 13:04:50 | D | + w = [min=0.1206, max=1.5000] +25-08-28 13:04:50 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 13:04:52 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 13:06:37 | D | - x / w range = AbsMax / AbsMax +25-08-28 13:06:37 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 13:06:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:06:37 | D | - sum error = [ 9350.3733, 9268.7669, 9233.3695, 9219.3284, 9158.2439] +25-08-28 13:06:37 | D | - best error = [ 9350.3733, 9268.7669, 9233.3695, 9219.3284, 9158.2439] +25-08-28 13:06:37 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 13:06:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:06:37 | D | - sum error = [ 9150.9082, 9247.7370, 9259.1097, 9273.5271, 9370.8292] +25-08-28 13:06:37 | D | - best error = [ 9150.9082, 9150.9082, 9150.9082, 9150.9082, 9150.9082] +25-08-28 13:06:37 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 13:06:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:06:37 | D | - sum error = [ 9432.4321, 9469.7709, 9583.6189, 9644.9970, 9750.5503] +25-08-28 13:06:37 | D | - best error = [ 9150.9082, 9150.9082, 9150.9082, 9150.9082, 9150.9082] +25-08-28 13:06:37 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:06:37 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:06:37 | D | - sum error = [ 9855.7342, 9986.1756, 10098.1551, 10244.5002, 10510.2989] +25-08-28 13:06:37 | D | - best error = [ 9150.9082, 9150.9082, 9150.9082, 9150.9082, 9150.9082] +25-08-28 13:06:37 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 13:06:37 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 13:06:37 | D | - sum error = [11510.1555, 11138.2728, 10711.7087, 10415.9666, 10120.9241] +25-08-28 13:06:37 | D | - best error = [ 9150.9082, 9150.9082, 9150.9082, 9150.9082, 9150.9082] +25-08-28 13:06:37 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 13:06:37 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 13:06:37 | D | - sum error = [ 9897.8943, 9731.5668, 9656.1317, 9512.9142, 9486.5825] +25-08-28 13:06:37 | D | - best error = [ 9150.9082, 9150.9082, 9150.9082, 9150.9082, 9150.9082] +25-08-28 13:06:37 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 13:06:37 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 13:06:37 | D | - sum error = [ 9444.1465, 9441.0101, 9495.7999, 9611.8785, 9684.1351] +25-08-28 13:06:37 | D | - best error = [ 9150.9082, 9150.9082, 9150.9082, 9150.9082, 9150.9082] +25-08-28 13:06:37 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:06:37 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 13:06:37 | D | - sum error = [ 9808.7222, 9965.8878, 10124.7092, 10371.9267] +25-08-28 13:06:37 | D | - best error = [ 9150.9082, 9150.9082, 9150.9082, 9150.9082] +25-08-28 13:06:37 | D | + error = 9150.9082 +25-08-28 13:06:37 | D | + scale = [min=0.8222, max=1.8588] +25-08-28 13:06:55 | D | - Smoothing Diffusion Block single_transformer_blocks.34 +25-08-28 13:06:55 | D | - Skipping Module single_transformer_blocks.34.norm.linear +25-08-28 13:06:55 | D | - Smoothing Transformer Block single_transformer_blocks.34 +25-08-28 13:06:55 | D | - single_transformer_blocks.34.attn.qkv_proj + single_transformer_blocks.34.up_proj +25-08-28 13:06:55 | D | + w: sint4 +25-08-28 13:06:55 | D | + x: sint4 +25-08-28 13:06:55 | D | + y: None +25-08-28 13:06:55 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:06:55 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 13:06:55 | D | + x - AbsMax +25-08-28 13:06:55 | D | + x = [min=0.6445, max=23.3750] +25-08-28 13:06:55 | D | + w - AbsMax +25-08-28 13:06:55 | D | + w = [min=0.1523, max=2.0156] +25-08-28 13:06:55 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 13:06:56 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 13:09:02 | D | - x / w range = AbsMax / AbsMax +25-08-28 13:09:02 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 13:09:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:09:02 | D | - sum error = [258091.0678, 249283.6970, 239998.3872, 232298.2601, 223964.0657] +25-08-28 13:09:02 | D | - best error = [258091.0678, 249283.6970, 239998.3872, 232298.2601, 223964.0657] +25-08-28 13:09:02 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 13:09:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:09:02 | D | - sum error = [217156.6024, 211309.5280, 205908.7985, 201588.3781, 197812.3840] +25-08-28 13:09:02 | D | - best error = [217156.6024, 211309.5280, 205908.7985, 201588.3781, 197812.3840] +25-08-28 13:09:02 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 13:09:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:09:02 | D | - sum error = [194130.5260, 192910.5555, 189335.4306, 188000.0082, 187605.3103] +25-08-28 13:09:02 | D | - best error = [194130.5260, 192910.5555, 189335.4306, 188000.0082, 187605.3103] +25-08-28 13:09:02 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:09:02 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:09:02 | D | - sum error = [187193.1841, 186868.6024, 187379.4531, 187475.8719, 190002.0958] +25-08-28 13:09:02 | D | - best error = [187193.1841, 186868.6024, 186868.6024, 186868.6024, 186868.6024] +25-08-28 13:09:02 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 13:09:02 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 13:09:02 | D | - sum error = [482115.8869, 451764.8073, 422172.7195, 393745.4977, 365311.8318] +25-08-28 13:09:02 | D | - best error = [186868.6024, 186868.6024, 186868.6024, 186868.6024, 186868.6024] +25-08-28 13:09:02 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 13:09:02 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 13:09:02 | D | - sum error = [340321.8400, 315126.2777, 293024.9208, 276756.8984, 259585.7988] +25-08-28 13:09:02 | D | - best error = [186868.6024, 186868.6024, 186868.6024, 186868.6024, 186868.6024] +25-08-28 13:09:02 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 13:09:02 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 13:09:02 | D | - sum error = [245109.9737, 231341.7996, 221110.6360, 211865.3375, 204932.4871] +25-08-28 13:09:02 | D | - best error = [186868.6024, 186868.6024, 186868.6024, 186868.6024, 186868.6024] +25-08-28 13:09:02 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:09:02 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 13:09:02 | D | - sum error = [198869.6171, 195206.9664, 192151.1085, 190773.4905] +25-08-28 13:09:02 | D | - best error = [186868.6024, 186868.6024, 186868.6024, 186868.6024] +25-08-28 13:09:02 | D | + error = 186868.6024 +25-08-28 13:09:02 | D | + scale = [min=0.7037, max=12.4452] +25-08-28 13:09:03 | D | - single_transformer_blocks.34.attn.out_proj +25-08-28 13:09:03 | D | + w: sint4 +25-08-28 13:09:03 | D | + x: sint4 +25-08-28 13:09:03 | D | + y: None +25-08-28 13:09:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:09:03 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 13:09:03 | D | + x - AbsMax +25-08-28 13:09:03 | D | + x = [min=0.5469, max=14.8125] +25-08-28 13:09:03 | D | + w - AbsMax +25-08-28 13:09:03 | D | + w = [min=0.0933, max=0.3691] +25-08-28 13:09:03 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 13:09:04 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 13:10:03 | D | - x / w range = AbsMax / AbsMax +25-08-28 13:10:03 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 13:10:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:10:03 | D | - sum error = [12732.8419, 12597.4385, 12499.2992, 12370.2611, 12257.1135] +25-08-28 13:10:03 | D | - best error = [12732.8419, 12597.4385, 12499.2992, 12370.2611, 12257.1135] +25-08-28 13:10:03 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 13:10:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:10:03 | D | - sum error = [12190.8346, 12093.4296, 12073.1722, 12001.9038, 11997.7825] +25-08-28 13:10:03 | D | - best error = [12190.8346, 12093.4296, 12073.1722, 12001.9038, 11997.7825] +25-08-28 13:10:03 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 13:10:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:10:03 | D | - sum error = [11923.8162, 11963.9044, 12062.6876, 12161.6473, 12243.5851] +25-08-28 13:10:03 | D | - best error = [11923.8162, 11923.8162, 11923.8162, 11923.8162, 11923.8162] +25-08-28 13:10:03 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:10:03 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:10:03 | D | - sum error = [12385.4715, 12520.5425, 12699.8929, 12965.7666, 13308.5446] +25-08-28 13:10:03 | D | - best error = [11923.8162, 11923.8162, 11923.8162, 11923.8162, 11923.8162] +25-08-28 13:10:03 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 13:10:03 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 13:10:03 | D | - sum error = [12711.5878, 12556.9742, 12389.9592, 12240.1601, 12114.6562] +25-08-28 13:10:03 | D | - best error = [11923.8162, 11923.8162, 11923.8162, 11923.8162, 11923.8162] +25-08-28 13:10:03 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 13:10:03 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 13:10:03 | D | - sum error = [12022.9234, 11918.5335, 11862.9345, 11763.0737, 11704.0239] +25-08-28 13:10:03 | D | - best error = [11923.8162, 11918.5335, 11862.9345, 11763.0737, 11704.0239] +25-08-28 13:10:03 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 13:10:03 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 13:10:03 | D | - sum error = [11690.7663, 11674.5967, 11746.9213, 11878.0362, 11989.7890] +25-08-28 13:10:03 | D | - best error = [11690.7663, 11674.5967, 11674.5967, 11674.5967, 11674.5967] +25-08-28 13:10:03 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:10:03 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 13:10:03 | D | - sum error = [12250.1579, 12446.5796, 12727.7498, 13175.8008] +25-08-28 13:10:03 | D | - best error = [11674.5967, 11674.5967, 11674.5967, 11674.5967] +25-08-28 13:10:03 | D | + error = 11674.5967 +25-08-28 13:10:03 | D | + scale = [min=1.5133, max=9.4261] +25-08-28 13:10:03 | D | - single_transformer_blocks.34.down_proj +25-08-28 13:10:03 | D | + w: sint4 +25-08-28 13:10:03 | D | + x: uint4 +25-08-28 13:10:03 | D | + y: None +25-08-28 13:10:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:10:03 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 13:10:05 | D | + x - AbsMax +25-08-28 13:10:05 | D | + x = [min=0.1719, max=19.5000] +25-08-28 13:10:05 | D | + w - AbsMax +25-08-28 13:10:05 | D | + w = [min=0.1050, max=3.1094] +25-08-28 13:10:05 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 13:10:07 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 13:11:56 | D | - x / w range = AbsMax / AbsMax +25-08-28 13:11:56 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 13:11:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:11:56 | D | - sum error = [12317.2581, 12178.7415, 11967.2219, 11769.9346, 11737.9760] +25-08-28 13:11:56 | D | - best error = [12317.2581, 12178.7415, 11967.2219, 11769.9346, 11737.9760] +25-08-28 13:11:56 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 13:11:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:11:56 | D | - sum error = [11646.1887, 11694.2858, 11669.6680, 11671.9082, 11698.6194] +25-08-28 13:11:56 | D | - best error = [11646.1887, 11646.1887, 11646.1887, 11646.1887, 11646.1887] +25-08-28 13:11:56 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 13:11:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:11:56 | D | - sum error = [11800.5519, 11825.9173, 11906.8209, 11990.7411, 12234.2222] +25-08-28 13:11:56 | D | - best error = [11646.1887, 11646.1887, 11646.1887, 11646.1887, 11646.1887] +25-08-28 13:11:56 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:11:56 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:11:56 | D | - sum error = [12446.0249, 12691.0539, 12970.7702, 13327.9850, 13656.4452] +25-08-28 13:11:56 | D | - best error = [11646.1887, 11646.1887, 11646.1887, 11646.1887, 11646.1887] +25-08-28 13:11:56 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 13:11:56 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 13:11:56 | D | - sum error = [13529.0147, 13078.4323, 12533.1962, 12048.8931, 11664.8539] +25-08-28 13:11:56 | D | - best error = [11646.1887, 11646.1887, 11646.1887, 11646.1887, 11646.1887] +25-08-28 13:11:56 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 13:11:56 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 13:11:56 | D | - sum error = [11513.7862, 11361.1644, 11227.7007, 11149.1244, 11202.1891] +25-08-28 13:11:56 | D | - best error = [11513.7862, 11361.1644, 11227.7007, 11149.1244, 11149.1244] +25-08-28 13:11:56 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 13:11:56 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 13:11:56 | D | - sum error = [11238.7049, 11296.5188, 11445.6661, 11675.8486, 11905.8822] +25-08-28 13:11:56 | D | - best error = [11149.1244, 11149.1244, 11149.1244, 11149.1244, 11149.1244] +25-08-28 13:11:56 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:11:56 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 13:11:56 | D | - sum error = [12194.0005, 12667.7935, 13037.4991, 13529.9650] +25-08-28 13:11:56 | D | - best error = [11149.1244, 11149.1244, 11149.1244, 11149.1244] +25-08-28 13:11:56 | D | + error = 11149.1244 +25-08-28 13:11:56 | D | + scale = [min=0.4940, max=7.6861] +25-08-28 13:12:15 | D | - Smoothing Diffusion Block single_transformer_blocks.35 +25-08-28 13:12:15 | D | - Skipping Module single_transformer_blocks.35.norm.linear +25-08-28 13:12:15 | D | - Smoothing Transformer Block single_transformer_blocks.35 +25-08-28 13:12:15 | D | - single_transformer_blocks.35.attn.qkv_proj + single_transformer_blocks.35.up_proj +25-08-28 13:12:15 | D | + w: sint4 +25-08-28 13:12:15 | D | + x: sint4 +25-08-28 13:12:15 | D | + y: None +25-08-28 13:12:15 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:12:15 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 13:12:15 | D | + x - AbsMax +25-08-28 13:12:15 | D | + x = [min=0.4414, max=25.3750] +25-08-28 13:12:15 | D | + w - AbsMax +25-08-28 13:12:15 | D | + w = [min=0.1514, max=0.8828] +25-08-28 13:12:15 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 13:12:16 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 13:14:22 | D | - x / w range = AbsMax / AbsMax +25-08-28 13:14:22 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 13:14:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:14:22 | D | - sum error = [331832.5631, 315810.5923, 302259.4450, 288467.8156, 275664.9616] +25-08-28 13:14:22 | D | - best error = [331832.5631, 315810.5923, 302259.4450, 288467.8156, 275664.9616] +25-08-28 13:14:22 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 13:14:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:14:22 | D | - sum error = [264026.1495, 254488.1759, 246313.6514, 237072.4919, 230509.4318] +25-08-28 13:14:22 | D | - best error = [264026.1495, 254488.1759, 246313.6514, 237072.4919, 230509.4318] +25-08-28 13:14:22 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 13:14:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:14:22 | D | - sum error = [223658.0979, 220061.9316, 215012.1696, 212345.1042, 209358.7913] +25-08-28 13:14:22 | D | - best error = [223658.0979, 220061.9316, 215012.1696, 212345.1042, 209358.7913] +25-08-28 13:14:22 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:14:22 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:14:22 | D | - sum error = [208229.3382, 206500.1531, 207980.2575, 209245.1396, 210017.7724] +25-08-28 13:14:22 | D | - best error = [208229.3382, 206500.1531, 206500.1531, 206500.1531, 206500.1531] +25-08-28 13:14:22 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 13:14:22 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 13:14:22 | D | - sum error = [542851.1238, 502992.7387, 467896.8699, 436768.2802, 403433.5425] +25-08-28 13:14:22 | D | - best error = [206500.1531, 206500.1531, 206500.1531, 206500.1531, 206500.1531] +25-08-28 13:14:22 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 13:14:22 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 13:14:22 | D | - sum error = [372463.2629, 344992.7702, 319335.9253, 297370.1444, 279567.5893] +25-08-28 13:14:22 | D | - best error = [206500.1531, 206500.1531, 206500.1531, 206500.1531, 206500.1531] +25-08-28 13:14:22 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 13:14:22 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 13:14:22 | D | - sum error = [264159.2228, 247833.7342, 236564.8562, 226124.4446, 219730.8375] +25-08-28 13:14:22 | D | - best error = [206500.1531, 206500.1531, 206500.1531, 206500.1531, 206500.1531] +25-08-28 13:14:22 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:14:22 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 13:14:22 | D | - sum error = [213764.9724, 210924.2806, 209271.9558, 210354.6525] +25-08-28 13:14:22 | D | - best error = [206500.1531, 206500.1531, 206500.1531, 206500.1531] +25-08-28 13:14:22 | D | + error = 206500.1531 +25-08-28 13:14:22 | D | + scale = [min=0.5198, max=13.2900] +25-08-28 13:14:22 | D | - single_transformer_blocks.35.attn.out_proj +25-08-28 13:14:22 | D | + w: sint4 +25-08-28 13:14:22 | D | + x: sint4 +25-08-28 13:14:22 | D | + y: None +25-08-28 13:14:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:14:22 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 13:14:23 | D | + x - AbsMax +25-08-28 13:14:23 | D | + x = [min=0.7617, max=13.3750] +25-08-28 13:14:23 | D | + w - AbsMax +25-08-28 13:14:23 | D | + w = [min=0.0972, max=0.5352] +25-08-28 13:14:23 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 13:14:24 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 13:15:25 | D | - x / w range = AbsMax / AbsMax +25-08-28 13:15:25 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 13:15:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:15:25 | D | - sum error = [12264.2348, 12155.3228, 12034.0198, 11936.5154, 11841.8512] +25-08-28 13:15:25 | D | - best error = [12264.2348, 12155.3228, 12034.0198, 11936.5154, 11841.8512] +25-08-28 13:15:25 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 13:15:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:15:25 | D | - sum error = [11764.0490, 11687.3196, 11643.0594, 11544.6715, 11530.9087] +25-08-28 13:15:25 | D | - best error = [11764.0490, 11687.3196, 11643.0594, 11544.6715, 11530.9087] +25-08-28 13:15:25 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 13:15:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:15:25 | D | - sum error = [11536.5246, 11480.3031, 11482.3357, 11478.9310, 11522.8396] +25-08-28 13:15:25 | D | - best error = [11530.9087, 11480.3031, 11480.3031, 11478.9310, 11478.9310] +25-08-28 13:15:25 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:15:25 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:15:25 | D | - sum error = [11589.8024, 11620.3367, 11707.9426, 11783.8344, 11888.7544] +25-08-28 13:15:25 | D | - best error = [11478.9310, 11478.9310, 11478.9310, 11478.9310, 11478.9310] +25-08-28 13:15:25 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 13:15:25 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 13:15:25 | D | - sum error = [12261.6938, 12122.9752, 11936.4229, 11820.6145, 11712.7834] +25-08-28 13:15:25 | D | - best error = [11478.9310, 11478.9310, 11478.9310, 11478.9310, 11478.9310] +25-08-28 13:15:25 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 13:15:25 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 13:15:25 | D | - sum error = [11560.9868, 11473.9360, 11412.2064, 11349.1811, 11290.1270] +25-08-28 13:15:25 | D | - best error = [11478.9310, 11473.9360, 11412.2064, 11349.1811, 11290.1270] +25-08-28 13:15:25 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 13:15:25 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 13:15:25 | D | - sum error = [11271.5742, 11299.8637, 11323.6302, 11306.4902, 11391.8606] +25-08-28 13:15:25 | D | - best error = [11271.5742, 11271.5742, 11271.5742, 11271.5742, 11271.5742] +25-08-28 13:15:25 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:15:25 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 13:15:25 | D | - sum error = [11518.1328, 11600.5524, 11720.9624, 11848.6934] +25-08-28 13:15:25 | D | - best error = [11271.5742, 11271.5742, 11271.5742, 11271.5742] +25-08-28 13:15:25 | D | + error = 11271.5742 +25-08-28 13:15:25 | D | + scale = [min=1.4698, max=8.7687] +25-08-28 13:15:26 | D | - single_transformer_blocks.35.down_proj +25-08-28 13:15:26 | D | + w: sint4 +25-08-28 13:15:26 | D | + x: uint4 +25-08-28 13:15:26 | D | + y: None +25-08-28 13:15:26 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:15:26 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 13:15:27 | D | + x - AbsMax +25-08-28 13:15:27 | D | + x = [min=0.3906, max=18.8750] +25-08-28 13:15:27 | D | + w - AbsMax +25-08-28 13:15:27 | D | + w = [min=0.1191, max=3.0625] +25-08-28 13:15:27 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 13:15:29 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 13:17:15 | D | - x / w range = AbsMax / AbsMax +25-08-28 13:17:15 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 13:17:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:17:15 | D | - sum error = [13619.1154, 13310.7091, 13075.4481, 13022.7125, 12829.6500] +25-08-28 13:17:15 | D | - best error = [13619.1154, 13310.7091, 13075.4481, 13022.7125, 12829.6500] +25-08-28 13:17:15 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 13:17:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:17:15 | D | - sum error = [12697.7919, 12564.3661, 12551.8783, 12572.6000, 12621.5219] +25-08-28 13:17:15 | D | - best error = [12697.7919, 12564.3661, 12551.8783, 12551.8783, 12551.8783] +25-08-28 13:17:15 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 13:17:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:17:15 | D | - sum error = [12768.3484, 12852.0280, 12946.0604, 13113.6954, 13230.1528] +25-08-28 13:17:15 | D | - best error = [12551.8783, 12551.8783, 12551.8783, 12551.8783, 12551.8783] +25-08-28 13:17:15 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:17:15 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:17:15 | D | - sum error = [13468.8922, 13729.4481, 13962.0675, 14425.4442, 14799.9714] +25-08-28 13:17:15 | D | - best error = [12551.8783, 12551.8783, 12551.8783, 12551.8783, 12551.8783] +25-08-28 13:17:15 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 13:17:15 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 13:17:15 | D | - sum error = [13861.7552, 13536.0437, 13147.0076, 12776.9674, 12557.1009] +25-08-28 13:17:15 | D | - best error = [12551.8783, 12551.8783, 12551.8783, 12551.8783, 12551.8783] +25-08-28 13:17:15 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 13:17:15 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 13:17:15 | D | - sum error = [12462.7105, 12317.9635, 12230.2242, 12204.5244, 12187.5254] +25-08-28 13:17:15 | D | - best error = [12462.7105, 12317.9635, 12230.2242, 12204.5244, 12187.5254] +25-08-28 13:17:15 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 13:17:15 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 13:17:15 | D | - sum error = [12252.0209, 12302.8543, 12442.5110, 12804.7048, 13022.4735] +25-08-28 13:17:15 | D | - best error = [12187.5254, 12187.5254, 12187.5254, 12187.5254, 12187.5254] +25-08-28 13:17:15 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:17:15 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 13:17:15 | D | - sum error = [13374.9425, 13688.0777, 14124.4378, 14503.3144] +25-08-28 13:17:15 | D | - best error = [12187.5254, 12187.5254, 12187.5254, 12187.5254] +25-08-28 13:17:15 | D | + error = 12187.5254 +25-08-28 13:17:15 | D | + scale = [min=0.5906, max=8.7674] +25-08-28 13:17:33 | D | - Smoothing Diffusion Block single_transformer_blocks.36 +25-08-28 13:17:33 | D | - Skipping Module single_transformer_blocks.36.norm.linear +25-08-28 13:17:33 | D | - Smoothing Transformer Block single_transformer_blocks.36 +25-08-28 13:17:33 | D | - single_transformer_blocks.36.attn.qkv_proj + single_transformer_blocks.36.up_proj +25-08-28 13:17:33 | D | + w: sint4 +25-08-28 13:17:33 | D | + x: sint4 +25-08-28 13:17:33 | D | + y: None +25-08-28 13:17:33 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:17:33 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 13:17:34 | D | + x - AbsMax +25-08-28 13:17:34 | D | + x = [min=0.4531, max=27.8750] +25-08-28 13:17:34 | D | + w - AbsMax +25-08-28 13:17:34 | D | + w = [min=0.1338, max=0.7969] +25-08-28 13:17:34 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 13:17:35 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 13:19:41 | D | - x / w range = AbsMax / AbsMax +25-08-28 13:19:41 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 13:19:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:19:41 | D | - sum error = [168253.1639, 158982.1448, 149478.2718, 139933.8230, 131999.0419] +25-08-28 13:19:41 | D | - best error = [168253.1639, 158982.1448, 149478.2718, 139933.8230, 131999.0419] +25-08-28 13:19:41 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 13:19:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:19:41 | D | - sum error = [124790.5802, 117299.5651, 111929.5916, 106134.9987, 102730.2774] +25-08-28 13:19:41 | D | - best error = [124790.5802, 117299.5651, 111929.5916, 106134.9987, 102730.2774] +25-08-28 13:19:41 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 13:19:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:19:41 | D | - sum error = [98655.8287, 96148.4612, 92666.0772, 90657.1211, 89067.0263] +25-08-28 13:19:41 | D | - best error = [98655.8287, 96148.4612, 92666.0772, 90657.1211, 89067.0263] +25-08-28 13:19:41 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:19:41 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:19:41 | D | - sum error = [88832.9539, 88463.6166, 88075.3103, 88802.8886, 89111.1723] +25-08-28 13:19:41 | D | - best error = [88832.9539, 88463.6166, 88075.3103, 88075.3103, 88075.3103] +25-08-28 13:19:41 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 13:19:41 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 13:19:41 | D | - sum error = [282547.6709, 256544.8682, 234150.2692, 212218.8467, 196106.5480] +25-08-28 13:19:41 | D | - best error = [88075.3103, 88075.3103, 88075.3103, 88075.3103, 88075.3103] +25-08-28 13:19:41 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 13:19:41 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 13:19:41 | D | - sum error = [179594.3547, 165351.2737, 149329.5626, 136991.1292, 126352.1570] +25-08-28 13:19:41 | D | - best error = [88075.3103, 88075.3103, 88075.3103, 88075.3103, 88075.3103] +25-08-28 13:19:41 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 13:19:41 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 13:19:41 | D | - sum error = [117544.3634, 108978.3761, 102616.9153, 97931.6072, 94235.3547] +25-08-28 13:19:41 | D | - best error = [88075.3103, 88075.3103, 88075.3103, 88075.3103, 88075.3103] +25-08-28 13:19:41 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:19:41 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 13:19:41 | D | - sum error = [91325.3931, 89788.2879, 89734.5247, 89717.5343] +25-08-28 13:19:41 | D | - best error = [88075.3103, 88075.3103, 88075.3103, 88075.3103] +25-08-28 13:19:41 | D | + error = 88075.3103 +25-08-28 13:19:41 | D | + scale = [min=0.5103, max=16.9213] +25-08-28 13:19:42 | D | - single_transformer_blocks.36.attn.out_proj +25-08-28 13:19:42 | D | + w: sint4 +25-08-28 13:19:42 | D | + x: sint4 +25-08-28 13:19:42 | D | + y: None +25-08-28 13:19:42 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:19:42 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 13:19:42 | D | + x - AbsMax +25-08-28 13:19:42 | D | + x = [min=0.7930, max=16.7500] +25-08-28 13:19:42 | D | + w - AbsMax +25-08-28 13:19:42 | D | + w = [min=0.1060, max=0.6562] +25-08-28 13:19:42 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 13:19:43 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 13:20:43 | D | - x / w range = AbsMax / AbsMax +25-08-28 13:20:43 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 13:20:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:20:43 | D | - sum error = [11335.9090, 11244.9028, 11128.6154, 11002.5175, 10923.6228] +25-08-28 13:20:43 | D | - best error = [11335.9090, 11244.9028, 11128.6154, 11002.5175, 10923.6228] +25-08-28 13:20:43 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 13:20:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:20:43 | D | - sum error = [10806.8741, 10703.7212, 10653.6026, 10633.9419, 10604.9151] +25-08-28 13:20:43 | D | - best error = [10806.8741, 10703.7212, 10653.6026, 10633.9419, 10604.9151] +25-08-28 13:20:43 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 13:20:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:20:43 | D | - sum error = [10599.4011, 10607.9751, 10598.1410, 10647.6292, 10699.2007] +25-08-28 13:20:43 | D | - best error = [10599.4011, 10599.4011, 10598.1410, 10598.1410, 10598.1410] +25-08-28 13:20:43 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:20:43 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:20:43 | D | - sum error = [10802.0517, 10850.5557, 11027.1570, 11169.4788, 11298.7093] +25-08-28 13:20:43 | D | - best error = [10598.1410, 10598.1410, 10598.1410, 10598.1410, 10598.1410] +25-08-28 13:20:43 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 13:20:43 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 13:20:43 | D | - sum error = [13790.5005, 13343.3579, 12961.2469, 12588.5618, 12285.8670] +25-08-28 13:20:43 | D | - best error = [10598.1410, 10598.1410, 10598.1410, 10598.1410, 10598.1410] +25-08-28 13:20:43 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 13:20:43 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 13:20:43 | D | - sum error = [12031.4501, 11779.1287, 11532.4700, 11366.7142, 11222.4881] +25-08-28 13:20:43 | D | - best error = [10598.1410, 10598.1410, 10598.1410, 10598.1410, 10598.1410] +25-08-28 13:20:43 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 13:20:43 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 13:20:43 | D | - sum error = [11089.5681, 10993.1772, 10978.1733, 10948.5864, 10968.2141] +25-08-28 13:20:43 | D | - best error = [10598.1410, 10598.1410, 10598.1410, 10598.1410, 10598.1410] +25-08-28 13:20:43 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:20:43 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 13:20:43 | D | - sum error = [10974.4088, 11072.4321, 11151.5717, 11329.7770] +25-08-28 13:20:43 | D | - best error = [10598.1410, 10598.1410, 10598.1410, 10598.1410] +25-08-28 13:20:43 | D | + error = 10598.1410 +25-08-28 13:20:43 | D | + scale = [min=0.8701, max=5.4251] +25-08-28 13:20:44 | D | - single_transformer_blocks.36.down_proj +25-08-28 13:20:44 | D | + w: sint4 +25-08-28 13:20:44 | D | + x: uint4 +25-08-28 13:20:44 | D | + y: None +25-08-28 13:20:44 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:20:44 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 13:20:47 | D | + x - AbsMax +25-08-28 13:20:47 | D | + x = [min=0.8242, max=17.1250] +25-08-28 13:20:47 | D | + w - AbsMax +25-08-28 13:20:47 | D | + w = [min=0.1279, max=1.1016] +25-08-28 13:20:47 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 13:20:49 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 13:22:46 | D | - x / w range = AbsMax / AbsMax +25-08-28 13:22:46 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 13:22:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:22:46 | D | - sum error = [15293.8560, 15170.0821, 15069.2359, 14973.5297, 14927.3794] +25-08-28 13:22:46 | D | - best error = [15293.8560, 15170.0821, 15069.2359, 14973.5297, 14927.3794] +25-08-28 13:22:46 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 13:22:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:22:46 | D | - sum error = [14863.1022, 14821.1287, 14866.2383, 15045.2209, 15057.4255] +25-08-28 13:22:46 | D | - best error = [14863.1022, 14821.1287, 14821.1287, 14821.1287, 14821.1287] +25-08-28 13:22:46 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 13:22:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:22:46 | D | - sum error = [15175.9863, 15255.5574, 15375.6077, 15597.4598, 15775.8215] +25-08-28 13:22:46 | D | - best error = [14821.1287, 14821.1287, 14821.1287, 14821.1287, 14821.1287] +25-08-28 13:22:46 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:22:46 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:22:46 | D | - sum error = [16106.4603, 16382.3896, 16665.6303, 17028.8311, 17392.4821] +25-08-28 13:22:46 | D | - best error = [14821.1287, 14821.1287, 14821.1287, 14821.1287, 14821.1287] +25-08-28 13:22:46 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 13:22:46 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 13:22:46 | D | - sum error = [18735.9172, 18205.5165, 17725.9755, 17264.8109, 17012.5262] +25-08-28 13:22:46 | D | - best error = [14821.1287, 14821.1287, 14821.1287, 14821.1287, 14821.1287] +25-08-28 13:22:46 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 13:22:46 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 13:22:46 | D | - sum error = [16601.7638, 16407.1818, 16155.7004, 16044.6801, 15972.5550] +25-08-28 13:22:46 | D | - best error = [14821.1287, 14821.1287, 14821.1287, 14821.1287, 14821.1287] +25-08-28 13:22:46 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 13:22:46 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 13:22:46 | D | - sum error = [15931.6621, 15899.6790, 15932.1722, 15976.8317, 16158.7014] +25-08-28 13:22:46 | D | - best error = [14821.1287, 14821.1287, 14821.1287, 14821.1287, 14821.1287] +25-08-28 13:22:46 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:22:46 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 13:22:46 | D | - sum error = [16385.2763, 16624.3571, 16968.2216, 17390.2014] +25-08-28 13:22:46 | D | - best error = [14821.1287, 14821.1287, 14821.1287, 14821.1287] +25-08-28 13:22:46 | D | + error = 14821.1287 +25-08-28 13:22:46 | D | + scale = [min=0.9437, max=2.3447] +25-08-28 13:23:05 | D | - Smoothing Diffusion Block single_transformer_blocks.37 +25-08-28 13:23:05 | D | - Skipping Module single_transformer_blocks.37.norm.linear +25-08-28 13:23:05 | D | - Smoothing Transformer Block single_transformer_blocks.37 +25-08-28 13:23:05 | D | - single_transformer_blocks.37.attn.qkv_proj + single_transformer_blocks.37.up_proj +25-08-28 13:23:05 | D | + w: sint4 +25-08-28 13:23:05 | D | + x: sint4 +25-08-28 13:23:05 | D | + y: None +25-08-28 13:23:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:23:05 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 13:23:05 | D | + x - AbsMax +25-08-28 13:23:05 | D | + x = [min=0.2139, max=29.2500] +25-08-28 13:23:05 | D | + w - AbsMax +25-08-28 13:23:05 | D | + w = [min=0.1196, max=0.6016] +25-08-28 13:23:05 | D | + finished resetting calibrator, ram usage: 16.7 +25-08-28 13:23:06 | D | + finished calculating the original outputs, ram usage: 15.5 +25-08-28 13:25:17 | D | - x / w range = AbsMax / AbsMax +25-08-28 13:25:17 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 13:25:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:25:17 | D | - sum error = [52902.8622, 49248.2079, 48128.3404, 44963.4817, 42943.8420] +25-08-28 13:25:17 | D | - best error = [52902.8622, 49248.2079, 48128.3404, 44963.4817, 42943.8420] +25-08-28 13:25:17 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 13:25:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:25:17 | D | - sum error = [39436.7479, 37248.5704, 34831.5664, 33359.6086, 32165.9437] +25-08-28 13:25:17 | D | - best error = [39436.7479, 37248.5704, 34831.5664, 33359.6086, 32165.9437] +25-08-28 13:25:17 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 13:25:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:25:17 | D | - sum error = [30397.1167, 28921.6752, 27817.2367, 27508.9443, 27004.6846] +25-08-28 13:25:17 | D | - best error = [30397.1167, 28921.6752, 27817.2367, 27508.9443, 27004.6846] +25-08-28 13:25:17 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:25:17 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:25:17 | D | - sum error = [26606.9874, 26646.0654, 26837.9969, 27532.1868, 27656.2511] +25-08-28 13:25:17 | D | - best error = [26606.9874, 26606.9874, 26606.9874, 26606.9874, 26606.9874] +25-08-28 13:25:17 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 13:25:17 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 13:25:17 | D | - sum error = [58860.4555, 54818.2720, 51042.7385, 48314.5745, 45806.9689] +25-08-28 13:25:17 | D | - best error = [26606.9874, 26606.9874, 26606.9874, 26606.9874, 26606.9874] +25-08-28 13:25:17 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 13:25:17 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 13:25:17 | D | - sum error = [42167.0279, 40206.1345, 37902.2000, 35283.1473, 33652.8042] +25-08-28 13:25:17 | D | - best error = [26606.9874, 26606.9874, 26606.9874, 26606.9874, 26606.9874] +25-08-28 13:25:17 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 13:25:17 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 13:25:17 | D | - sum error = [32524.3785, 30397.5999, 29162.8213, 29012.2501, 27343.8982] +25-08-28 13:25:17 | D | - best error = [26606.9874, 26606.9874, 26606.9874, 26606.9874, 26606.9874] +25-08-28 13:25:17 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:25:17 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 13:25:17 | D | - sum error = [26685.8320, 26378.8436, 27435.4748, 27930.9285] +25-08-28 13:25:18 | D | - best error = [26606.9874, 26378.8436, 26378.8436, 26378.8436] +25-08-28 13:25:18 | D | + error = 26378.8436 +25-08-28 13:25:18 | D | + scale = [min=0.3561, max=22.0129] +25-08-28 13:25:18 | D | - single_transformer_blocks.37.attn.out_proj +25-08-28 13:25:18 | D | + w: sint4 +25-08-28 13:25:18 | D | + x: sint4 +25-08-28 13:25:18 | D | + y: None +25-08-28 13:25:18 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:25:18 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 13:25:19 | D | + x - AbsMax +25-08-28 13:25:19 | D | + x = [min=0.9375, max=12.0000] +25-08-28 13:25:19 | D | + w - AbsMax +25-08-28 13:25:19 | D | + w = [min=0.0613, max=0.8867] +25-08-28 13:25:19 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 13:25:19 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 13:26:23 | D | - x / w range = AbsMax / AbsMax +25-08-28 13:26:23 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 13:26:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:26:23 | D | - sum error = [ 8651.9861, 8559.7051, 8596.6710, 8421.7433, 8361.3932] +25-08-28 13:26:23 | D | - best error = [ 8651.9861, 8559.7051, 8559.7051, 8421.7433, 8361.3932] +25-08-28 13:26:23 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 13:26:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:26:23 | D | - sum error = [ 8230.8882, 8166.7893, 8213.7913, 8154.9229, 8239.0014] +25-08-28 13:26:23 | D | - best error = [ 8230.8882, 8166.7893, 8166.7893, 8154.9229, 8154.9229] +25-08-28 13:26:23 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 13:26:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:26:23 | D | - sum error = [ 8260.5143, 8352.0345, 8384.0759, 8316.1496, 8235.3385] +25-08-28 13:26:23 | D | - best error = [ 8154.9229, 8154.9229, 8154.9229, 8154.9229, 8154.9229] +25-08-28 13:26:23 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:26:23 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:26:23 | D | - sum error = [ 8146.9094, 8210.8962, 8196.2304, 8313.8251, 8386.3871] +25-08-28 13:26:23 | D | - best error = [ 8146.9094, 8146.9094, 8146.9094, 8146.9094, 8146.9094] +25-08-28 13:26:23 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 13:26:23 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 13:26:23 | D | - sum error = [ 7648.9714, 7546.7871, 7438.6200, 7318.5108, 7204.0089] +25-08-28 13:26:23 | D | - best error = [ 7648.9714, 7546.7871, 7438.6200, 7318.5108, 7204.0089] +25-08-28 13:26:23 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 13:26:23 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 13:26:23 | D | - sum error = [ 7081.6120, 7092.7893, 7017.8136, 7103.7027, 7005.9583] +25-08-28 13:26:23 | D | - best error = [ 7081.6120, 7081.6120, 7017.8136, 7017.8136, 7005.9583] +25-08-28 13:26:23 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 13:26:23 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 13:26:23 | D | - sum error = [ 7105.5117, 7155.3211, 7295.4964, 7547.2066, 7665.1620] +25-08-28 13:26:23 | D | - best error = [ 7005.9583, 7005.9583, 7005.9583, 7005.9583, 7005.9583] +25-08-28 13:26:23 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:26:23 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 13:26:23 | D | - sum error = [ 7748.2881, 7856.3080, 8124.7757, 8299.3846] +25-08-28 13:26:23 | D | - best error = [ 7005.9583, 7005.9583, 7005.9583, 7005.9583] +25-08-28 13:26:23 | D | + error = 7005.9583 +25-08-28 13:26:23 | D | + scale = [min=1.9149, max=10.3145] +25-08-28 13:26:23 | D | - single_transformer_blocks.37.down_proj +25-08-28 13:26:23 | D | + w: sint4 +25-08-28 13:26:23 | D | + x: uint4 +25-08-28 13:26:23 | D | + y: None +25-08-28 13:26:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:26:23 | D | + finished parsing calibration arguments, ram usage: 16.6 +25-08-28 13:26:26 | D | + x - AbsMax +25-08-28 13:26:26 | D | + x = [min=2.2188, max=16.6250] +25-08-28 13:26:26 | D | + w - AbsMax +25-08-28 13:26:26 | D | + w = [min=0.0554, max=0.8086] +25-08-28 13:26:26 | D | + finished resetting calibrator, ram usage: 17.0 +25-08-28 13:26:28 | D | + finished calculating the original outputs, ram usage: 17.4 +25-08-28 13:28:27 | D | - x / w range = AbsMax / AbsMax +25-08-28 13:28:27 | D | - alpha = [ 0.0000, 0.0500, 0.1000, 0.1500, 0.2000] +25-08-28 13:28:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:28:27 | D | - sum error = [27200.0800, 26628.4720, 26130.3257, 25444.9593, 24846.6748] +25-08-28 13:28:27 | D | - best error = [27200.0800, 26628.4720, 26130.3257, 25444.9593, 24846.6748] +25-08-28 13:28:27 | D | - alpha = [ 0.2500, 0.3000, 0.3500, 0.4000, 0.4500] +25-08-28 13:28:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:28:27 | D | - sum error = [24385.1916, 24052.1367, 23678.7675, 23320.8782, 22862.9559] +25-08-28 13:28:27 | D | - best error = [24385.1916, 24052.1367, 23678.7675, 23320.8782, 22862.9559] +25-08-28 13:28:27 | D | - alpha = [ 0.5000, 0.5500, 0.6000, 0.6500, 0.7000] +25-08-28 13:28:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:28:27 | D | - sum error = [22483.2781, 22202.9187, 22005.1965, 21716.7084, 21375.8862] +25-08-28 13:28:27 | D | - best error = [22483.2781, 22202.9187, 22005.1965, 21716.7084, 21375.8862] +25-08-28 13:28:27 | D | - alpha = [ 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:28:27 | D | - beta = [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000] +25-08-28 13:28:27 | D | - sum error = [21210.8418, 21137.0742, 20890.7267, 20773.6433, 20821.3736] +25-08-28 13:28:27 | D | - best error = [21210.8418, 21137.0742, 20890.7267, 20773.6433, 20773.6433] +25-08-28 13:28:27 | D | - alpha = [ 0.0500, 0.1000, 0.1500, 0.2000, 0.2500] +25-08-28 13:28:27 | D | - beta = [ 0.9500, 0.9000, 0.8500, 0.8000, 0.7500] +25-08-28 13:28:27 | D | - sum error = [15434.0206, 15073.7048, 15050.9115, 14967.4078, 14889.8236] +25-08-28 13:28:27 | D | - best error = [15434.0206, 15073.7048, 15050.9115, 14967.4078, 14889.8236] +25-08-28 13:28:27 | D | - alpha = [ 0.3000, 0.3500, 0.4000, 0.4500, 0.5000] +25-08-28 13:28:27 | D | - beta = [ 0.7000, 0.6500, 0.6000, 0.5500, 0.5000] +25-08-28 13:28:27 | D | - sum error = [14849.6615, 14909.5116, 15027.2558, 15212.7268, 15328.5470] +25-08-28 13:28:27 | D | - best error = [14849.6615, 14849.6615, 14849.6615, 14849.6615, 14849.6615] +25-08-28 13:28:27 | D | - alpha = [ 0.5500, 0.6000, 0.6500, 0.7000, 0.7500] +25-08-28 13:28:27 | D | - beta = [ 0.4500, 0.4000, 0.3500, 0.3000, 0.2500] +25-08-28 13:28:27 | D | - sum error = [15582.1868, 15896.7086, 16159.7109, 16520.8784, 16996.4401] +25-08-28 13:28:27 | D | - best error = [14849.6615, 14849.6615, 14849.6615, 14849.6615, 14849.6615] +25-08-28 13:28:27 | D | - alpha = [ 0.8000, 0.8500, 0.9000, 0.9500] +25-08-28 13:28:27 | D | - beta = [ 0.2000, 0.1500, 0.1000, 0.0500] +25-08-28 13:28:27 | D | - sum error = [17565.1182, 18341.4809, 19003.8884, 19799.3600] +25-08-28 13:28:27 | D | - best error = [14849.6615, 14849.6615, 14849.6615, 14849.6615] +25-08-28 13:28:27 | D | + error = 14849.6615 +25-08-28 13:28:27 | D | + scale = [min=2.0968, max=13.9965] +25-08-28 13:28:29 | I | - Saving smooth scales to runs/diffusion/cache/quant/qdiff.128/smooth/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]/shift/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]/flux.1-dev-ghibli.pt +25-08-28 13:28:29 | I | - Linking smooth scales to runs/diffusion/flux.1/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000.RUNNING/run-250828.070127.RUNNING/model/smooth.pt +25-08-28 13:28:29 | I | * Quantizing weights +25-08-28 13:28:29 | I | - Generating weight settings +25-08-28 13:28:29 | I | - Generating branch settings +25-08-28 13:28:29 | I | * Adding low-rank branches to weights +25-08-28 13:29:29 | D | - Calibrating low-rank branches of block transformer_blocks.0 +25-08-28 13:29:29 | D | - Calibrating low-rank branch for transformer_blocks.0.attn.to_q, transformer_blocks.0.attn.to_k, transformer_blocks.0.attn.to_v +25-08-28 13:29:29 | D | + w: sint4 +25-08-28 13:29:29 | D | + x: sint4 +25-08-28 13:29:29 | D | + y: None +25-08-28 13:29:29 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:29:29 | D | + finished parsing calibration arguments, ram usage: 19.1 +25-08-28 13:29:29 | D | + finished resetting calibrator, ram usage: 19.1 +25-08-28 13:29:30 | D | + finished calculating the original outputs, ram usage: 19.1 +25-08-28 13:29:43 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 13:29:43 | D | - error = [ 566.2484, 550.0412, 541.4420, 523.0425, 522.2649, 520.9175, 527.4547] +25-08-28 13:29:43 | D | - best error = [ 566.2484, 550.0412, 541.4420, 523.0425, 522.2649, 520.9175, 520.9175] +25-08-28 13:29:43 | D | + Adding low-rank branches to transformer_blocks.0.attn.to_q, transformer_blocks.0.attn.to_k, transformer_blocks.0.attn.to_v +25-08-28 13:29:43 | D | - Calibrating low-rank branch for transformer_blocks.0.attn.add_q_proj, transformer_blocks.0.attn.add_k_proj, transformer_blocks.0.attn.add_v_proj +25-08-28 13:29:43 | D | + w: sint4 +25-08-28 13:29:43 | D | + x: sint4 +25-08-28 13:29:43 | D | + y: None +25-08-28 13:29:43 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:29:43 | D | + finished parsing calibration arguments, ram usage: 19.1 +25-08-28 13:29:43 | D | + finished resetting calibrator, ram usage: 19.1 +25-08-28 13:29:44 | D | + finished calculating the original outputs, ram usage: 19.1 +25-08-28 13:29:52 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-28 13:29:52 | D | - error = [ 527.0578, 519.5223, 508.5588, 508.1465, 510.3070] +25-08-28 13:29:52 | D | - best error = [ 527.0578, 519.5223, 508.5588, 508.1465, 508.1465] +25-08-28 13:29:52 | D | + Adding low-rank branches to transformer_blocks.0.attn.add_q_proj, transformer_blocks.0.attn.add_k_proj, transformer_blocks.0.attn.add_v_proj +25-08-28 13:29:53 | D | - Calibrating low-rank branch for transformer_blocks.0.attn.to_out.0 +25-08-28 13:29:53 | D | + w: sint4 +25-08-28 13:29:53 | D | + x: sint4 +25-08-28 13:29:53 | D | + y: None +25-08-28 13:29:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:29:53 | D | + finished parsing calibration arguments, ram usage: 19.1 +25-08-28 13:29:53 | D | + finished resetting calibrator, ram usage: 19.1 +25-08-28 13:29:53 | D | + finished calculating the original outputs, ram usage: 19.1 +25-08-28 13:30:07 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:30:07 | D | - error = [ 990.3302, 949.6958, 926.4794, 908.0543, 894.2818, 885.2997, 880.4703, 879.2790, 873.6486, 868.1801] +25-08-28 13:30:07 | D | - best error = [ 990.3302, 949.6958, 926.4794, 908.0543, 894.2818, 885.2997, 880.4703, 879.2790, 873.6486, 868.1801] +25-08-28 13:30:15 | D | - iter = [ 10, 11, 12, 13, 14, 15] +25-08-28 13:30:15 | D | - error = [ 866.8913, 861.0642, 860.7116, 859.1977, 855.6942, 856.0828] +25-08-28 13:30:15 | D | - best error = [ 866.8913, 861.0642, 860.7116, 859.1977, 855.6942, 855.6942] +25-08-28 13:30:16 | D | + Adding low-rank branches to transformer_blocks.0.attn.to_out.0 +25-08-28 13:30:16 | D | - Calibrating low-rank branch for transformer_blocks.0.attn.to_add_out +25-08-28 13:30:16 | D | + w: sint4 +25-08-28 13:30:16 | D | + x: sint4 +25-08-28 13:30:16 | D | + y: None +25-08-28 13:30:16 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:30:16 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 13:30:16 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 13:30:16 | D | + finished calculating the original outputs, ram usage: 19.1 +25-08-28 13:30:24 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 13:30:24 | D | - error = [ 3131.8768, 3105.8184, 3086.4868, 3073.5148, 3061.1488, 3057.4990, 3055.7848, 3059.3481] +25-08-28 13:30:24 | D | - best error = [ 3131.8768, 3105.8184, 3086.4868, 3073.5148, 3061.1488, 3057.4990, 3055.7848, 3055.7848] +25-08-28 13:30:24 | D | + Adding low-rank branches to transformer_blocks.0.attn.to_add_out +25-08-28 13:30:25 | D | - Calibrating low-rank branch for transformer_blocks.0.ff.net.0.proj +25-08-28 13:30:25 | D | + w: sint4 +25-08-28 13:30:25 | D | + x: sint4 +25-08-28 13:30:25 | D | + y: None +25-08-28 13:30:25 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:30:25 | D | + finished parsing calibration arguments, ram usage: 19.1 +25-08-28 13:30:25 | D | + finished resetting calibrator, ram usage: 19.1 +25-08-28 13:30:26 | D | + finished calculating the original outputs, ram usage: 19.1 +25-08-28 13:30:44 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:30:44 | D | - error = [ 1026.0091, 1019.0213, 1013.9311, 1012.6189, 1011.3317, 1010.3810, 1010.2626, 1010.1530, 1009.5852, 1009.0679] +25-08-28 13:30:44 | D | - best error = [ 1026.0091, 1019.0213, 1013.9311, 1012.6189, 1011.3317, 1010.3810, 1010.2626, 1010.1530, 1009.5852, 1009.0679] +25-08-28 13:30:50 | D | - iter = [ 10, 11, 12] +25-08-28 13:30:50 | D | - error = [ 1008.7414, 1008.7367, 1009.3324] +25-08-28 13:30:50 | D | - best error = [ 1008.7414, 1008.7367, 1008.7367] +25-08-28 13:30:50 | D | + Adding low-rank branches to transformer_blocks.0.ff.net.0.proj +25-08-28 13:30:50 | D | - Calibrating low-rank branch for transformer_blocks.0.ff.net.2.linear +25-08-28 13:30:50 | D | + w: sint4 +25-08-28 13:30:50 | D | + x: sint4 +25-08-28 13:30:50 | D | + y: None +25-08-28 13:30:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:30:50 | D | + finished parsing calibration arguments, ram usage: 19.1 +25-08-28 13:30:50 | D | + finished resetting calibrator, ram usage: 19.1 +25-08-28 13:30:52 | D | + finished calculating the original outputs, ram usage: 19.1 +25-08-28 13:31:17 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:31:17 | D | - error = [ 2866.5130, 2796.3312, 2754.5413, 2738.6007, 2713.2381, 2700.0942, 2679.6605, 2666.8107, 2657.3622, 2658.4099] +25-08-28 13:31:17 | D | - best error = [ 2866.5130, 2796.3312, 2754.5413, 2738.6007, 2713.2381, 2700.0942, 2679.6605, 2666.8107, 2657.3622, 2657.3622] +25-08-28 13:31:17 | D | + Adding low-rank branches to transformer_blocks.0.ff.net.2.linear +25-08-28 13:31:17 | D | - Calibrating low-rank branch for transformer_blocks.0.ff_context.net.0.proj +25-08-28 13:31:17 | D | + w: sint4 +25-08-28 13:31:17 | D | + x: sint4 +25-08-28 13:31:17 | D | + y: None +25-08-28 13:31:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:31:17 | D | + finished parsing calibration arguments, ram usage: 19.1 +25-08-28 13:31:17 | D | + finished resetting calibrator, ram usage: 19.1 +25-08-28 13:31:18 | D | + finished calculating the original outputs, ram usage: 19.1 +25-08-28 13:31:29 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:31:29 | D | - error = [ 3203.5689, 3144.0304, 3101.7281, 3073.3278, 3050.8946, 3033.7521, 3025.7055, 3012.9831, 3001.0925, 2994.9321] +25-08-28 13:31:29 | D | - best error = [ 3203.5689, 3144.0304, 3101.7281, 3073.3278, 3050.8946, 3033.7521, 3025.7055, 3012.9831, 3001.0925, 2994.9321] +25-08-28 13:31:35 | D | - iter = [ 10, 11, 12, 13, 14] +25-08-28 13:31:35 | D | - error = [ 2990.4903, 2983.0501, 2976.6930, 2975.1990, 2977.0170] +25-08-28 13:31:35 | D | - best error = [ 2990.4903, 2983.0501, 2976.6930, 2975.1990, 2975.1990] +25-08-28 13:31:35 | D | + Adding low-rank branches to transformer_blocks.0.ff_context.net.0.proj +25-08-28 13:31:35 | D | - Calibrating low-rank branch for transformer_blocks.0.ff_context.net.2.linear +25-08-28 13:31:35 | D | + w: sint4 +25-08-28 13:31:35 | D | + x: sint4 +25-08-28 13:31:35 | D | + y: None +25-08-28 13:31:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:31:35 | D | + finished parsing calibration arguments, ram usage: 19.1 +25-08-28 13:31:35 | D | + finished resetting calibrator, ram usage: 19.1 +25-08-28 13:31:36 | D | + finished calculating the original outputs, ram usage: 19.1 +25-08-28 13:31:40 | D | - iter = [ 0, 1, 2] +25-08-28 13:31:40 | D | - error = [ 3339.7168, 3324.9313, 3332.4907] +25-08-28 13:31:40 | D | - best error = [ 3339.7168, 3324.9313, 3324.9313] +25-08-28 13:31:40 | D | + Adding low-rank branches to transformer_blocks.0.ff_context.net.2.linear +25-08-28 13:31:57 | D | - Calibrating low-rank branches of block transformer_blocks.1 +25-08-28 13:31:57 | D | - Calibrating low-rank branch for transformer_blocks.1.attn.to_q, transformer_blocks.1.attn.to_k, transformer_blocks.1.attn.to_v +25-08-28 13:31:57 | D | + w: sint4 +25-08-28 13:31:57 | D | + x: sint4 +25-08-28 13:31:57 | D | + y: None +25-08-28 13:31:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:31:57 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:31:57 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:31:58 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:32:14 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 13:32:14 | D | - error = [ 1109.9494, 1065.2749, 1035.5305, 980.0761, 976.6668, 949.4141, 943.7986, 948.5993] +25-08-28 13:32:14 | D | - best error = [ 1109.9494, 1065.2749, 1035.5305, 980.0761, 976.6668, 949.4141, 943.7986, 943.7986] +25-08-28 13:32:14 | D | + Adding low-rank branches to transformer_blocks.1.attn.to_q, transformer_blocks.1.attn.to_k, transformer_blocks.1.attn.to_v +25-08-28 13:32:14 | D | - Calibrating low-rank branch for transformer_blocks.1.attn.add_q_proj, transformer_blocks.1.attn.add_k_proj, transformer_blocks.1.attn.add_v_proj +25-08-28 13:32:14 | D | + w: sint4 +25-08-28 13:32:14 | D | + x: sint4 +25-08-28 13:32:14 | D | + y: None +25-08-28 13:32:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:32:14 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:32:14 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:32:15 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:32:27 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 13:32:27 | D | - error = [ 1082.0340, 1049.7800, 1046.2327, 1042.0086, 1019.7015, 1017.7588, 1028.2841] +25-08-28 13:32:27 | D | - best error = [ 1082.0340, 1049.7800, 1046.2327, 1042.0086, 1019.7015, 1017.7588, 1017.7588] +25-08-28 13:32:27 | D | + Adding low-rank branches to transformer_blocks.1.attn.add_q_proj, transformer_blocks.1.attn.add_k_proj, transformer_blocks.1.attn.add_v_proj +25-08-28 13:32:27 | D | - Calibrating low-rank branch for transformer_blocks.1.attn.to_out.0 +25-08-28 13:32:27 | D | + w: sint4 +25-08-28 13:32:27 | D | + x: sint4 +25-08-28 13:32:27 | D | + y: None +25-08-28 13:32:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:32:27 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:32:27 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:32:28 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:32:43 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:32:43 | D | - error = [ 1523.3403, 1475.2679, 1446.9003, 1429.8805, 1417.5364, 1411.5404, 1406.4778, 1403.9687, 1400.1508, 1396.3327] +25-08-28 13:32:43 | D | - best error = [ 1523.3403, 1475.2679, 1446.9003, 1429.8805, 1417.5364, 1411.5404, 1406.4778, 1403.9687, 1400.1508, 1396.3327] +25-08-28 13:32:44 | D | - iter = [ 10] +25-08-28 13:32:44 | D | - error = [ 1397.8864] +25-08-28 13:32:44 | D | - best error = [ 1396.3327] +25-08-28 13:32:44 | D | + Adding low-rank branches to transformer_blocks.1.attn.to_out.0 +25-08-28 13:32:45 | D | - Calibrating low-rank branch for transformer_blocks.1.attn.to_add_out +25-08-28 13:32:45 | D | + w: sint4 +25-08-28 13:32:45 | D | + x: sint4 +25-08-28 13:32:45 | D | + y: None +25-08-28 13:32:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:32:45 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:32:45 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:32:45 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:32:52 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 13:32:52 | D | - error = [ 2503.9345, 2470.0694, 2440.3904, 2411.8179, 2410.2188, 2408.6025, 2413.4050] +25-08-28 13:32:52 | D | - best error = [ 2503.9345, 2470.0694, 2440.3904, 2411.8179, 2410.2188, 2408.6025, 2408.6025] +25-08-28 13:32:52 | D | + Adding low-rank branches to transformer_blocks.1.attn.to_add_out +25-08-28 13:32:53 | D | - Calibrating low-rank branch for transformer_blocks.1.ff.net.0.proj +25-08-28 13:32:53 | D | + w: sint4 +25-08-28 13:32:53 | D | + x: sint4 +25-08-28 13:32:53 | D | + y: None +25-08-28 13:32:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:32:53 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:32:53 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:32:54 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:33:13 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:33:13 | D | - error = [ 2655.9431, 2638.4412, 2632.9585, 2628.0184, 2624.2950, 2622.5513, 2622.1215, 2621.8372, 2621.0922, 2619.8260] +25-08-28 13:33:13 | D | - best error = [ 2655.9431, 2638.4412, 2632.9585, 2628.0184, 2624.2950, 2622.5513, 2622.1215, 2621.8372, 2621.0922, 2619.8260] +25-08-28 13:33:24 | D | - iter = [ 10, 11, 12, 13, 14, 15] +25-08-28 13:33:24 | D | - error = [ 2618.8889, 2617.7485, 2616.2343, 2615.8400, 2614.8806, 2615.4323] +25-08-28 13:33:24 | D | - best error = [ 2618.8889, 2617.7485, 2616.2343, 2615.8400, 2614.8806, 2614.8806] +25-08-28 13:33:25 | D | + Adding low-rank branches to transformer_blocks.1.ff.net.0.proj +25-08-28 13:33:25 | D | - Calibrating low-rank branch for transformer_blocks.1.ff.net.2.linear +25-08-28 13:33:25 | D | + w: sint4 +25-08-28 13:33:25 | D | + x: sint4 +25-08-28 13:33:25 | D | + y: None +25-08-28 13:33:25 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:33:25 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:33:25 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:33:29 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:33:58 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:33:58 | D | - error = [ 3422.2031, 3366.7465, 3328.7636, 3292.4265, 3279.6472, 3271.3404, 3266.3140, 3259.1250, 3250.5293, 3252.6568] +25-08-28 13:33:58 | D | - best error = [ 3422.2031, 3366.7465, 3328.7636, 3292.4265, 3279.6472, 3271.3404, 3266.3140, 3259.1250, 3250.5293, 3250.5293] +25-08-28 13:33:58 | D | + Adding low-rank branches to transformer_blocks.1.ff.net.2.linear +25-08-28 13:33:58 | D | - Calibrating low-rank branch for transformer_blocks.1.ff_context.net.0.proj +25-08-28 13:33:58 | D | + w: sint4 +25-08-28 13:33:58 | D | + x: sint4 +25-08-28 13:33:58 | D | + y: None +25-08-28 13:33:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:33:58 | D | + finished parsing calibration arguments, ram usage: 16.3 +25-08-28 13:33:58 | D | + finished resetting calibrator, ram usage: 16.3 +25-08-28 13:33:59 | D | + finished calculating the original outputs, ram usage: 16.3 +25-08-28 13:34:10 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:34:10 | D | - error = [ 3168.2134, 3131.1790, 3111.8512, 3093.8316, 3080.2833, 3075.5877, 3073.9247, 3070.0982, 3065.7888, 3065.3535] +25-08-28 13:34:10 | D | - best error = [ 3168.2134, 3131.1790, 3111.8512, 3093.8316, 3080.2833, 3075.5877, 3073.9247, 3070.0982, 3065.7888, 3065.3535] +25-08-28 13:34:11 | D | - iter = [ 10] +25-08-28 13:34:11 | D | - error = [ 3066.9886] +25-08-28 13:34:11 | D | - best error = [ 3065.3535] +25-08-28 13:34:11 | D | + Adding low-rank branches to transformer_blocks.1.ff_context.net.0.proj +25-08-28 13:34:11 | D | - Calibrating low-rank branch for transformer_blocks.1.ff_context.net.2.linear +25-08-28 13:34:11 | D | + w: sint4 +25-08-28 13:34:11 | D | + x: sint4 +25-08-28 13:34:11 | D | + y: None +25-08-28 13:34:11 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:34:11 | D | + finished parsing calibration arguments, ram usage: 18.7 +25-08-28 13:34:11 | D | + finished resetting calibrator, ram usage: 18.7 +25-08-28 13:34:12 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:34:16 | D | - iter = [ 0, 1, 2] +25-08-28 13:34:16 | D | - error = [ 1331.7704, 1319.8386, 1325.4208] +25-08-28 13:34:16 | D | - best error = [ 1331.7704, 1319.8386, 1319.8386] +25-08-28 13:34:16 | D | + Adding low-rank branches to transformer_blocks.1.ff_context.net.2.linear +25-08-28 13:34:34 | D | - Calibrating low-rank branches of block transformer_blocks.2 +25-08-28 13:34:34 | D | - Calibrating low-rank branch for transformer_blocks.2.attn.to_q, transformer_blocks.2.attn.to_k, transformer_blocks.2.attn.to_v +25-08-28 13:34:34 | D | + w: sint4 +25-08-28 13:34:34 | D | + x: sint4 +25-08-28 13:34:34 | D | + y: None +25-08-28 13:34:34 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:34:34 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:34:34 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:34:35 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:34:53 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:34:53 | D | - error = [ 2375.3692, 2088.1646, 1987.3650, 1908.5081, 1835.4181, 1794.6694, 1784.1833, 1751.8151, 1714.4327, 1703.4868] +25-08-28 13:34:53 | D | - best error = [ 2375.3692, 2088.1646, 1987.3650, 1908.5081, 1835.4181, 1794.6694, 1784.1833, 1751.8151, 1714.4327, 1703.4868] +25-08-28 13:34:57 | D | - iter = [ 10, 11] +25-08-28 13:34:57 | D | - error = [ 1697.4459, 1706.4431] +25-08-28 13:34:57 | D | - best error = [ 1697.4459, 1697.4459] +25-08-28 13:34:57 | D | + Adding low-rank branches to transformer_blocks.2.attn.to_q, transformer_blocks.2.attn.to_k, transformer_blocks.2.attn.to_v +25-08-28 13:34:57 | D | - Calibrating low-rank branch for transformer_blocks.2.attn.add_q_proj, transformer_blocks.2.attn.add_k_proj, transformer_blocks.2.attn.add_v_proj +25-08-28 13:34:57 | D | + w: sint4 +25-08-28 13:34:57 | D | + x: sint4 +25-08-28 13:34:57 | D | + y: None +25-08-28 13:34:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:34:57 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:34:57 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:34:58 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:35:04 | D | - iter = [ 0, 1, 2, 3] +25-08-28 13:35:04 | D | - error = [ 639.8724, 632.2309, 628.2989, 633.4208] +25-08-28 13:35:04 | D | - best error = [ 639.8724, 632.2309, 628.2989, 628.2989] +25-08-28 13:35:05 | D | + Adding low-rank branches to transformer_blocks.2.attn.add_q_proj, transformer_blocks.2.attn.add_k_proj, transformer_blocks.2.attn.add_v_proj +25-08-28 13:35:05 | D | - Calibrating low-rank branch for transformer_blocks.2.attn.to_out.0 +25-08-28 13:35:05 | D | + w: sint4 +25-08-28 13:35:05 | D | + x: sint4 +25-08-28 13:35:05 | D | + y: None +25-08-28 13:35:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:35:05 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:35:05 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:35:06 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:35:20 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:35:20 | D | - error = [ 2556.9012, 2477.8576, 2430.6355, 2400.7243, 2382.3396, 2370.3676, 2361.2748, 2356.0001, 2352.2909, 2347.6158] +25-08-28 13:35:20 | D | - best error = [ 2556.9012, 2477.8576, 2430.6355, 2400.7243, 2382.3396, 2370.3676, 2361.2748, 2356.0001, 2352.2909, 2347.6158] +25-08-28 13:35:34 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 13:35:34 | D | - error = [ 2342.0294, 2335.8983, 2333.9010, 2331.0766, 2329.9627, 2328.4675, 2325.7468, 2324.2198, 2323.5772, 2320.8897] +25-08-28 13:35:34 | D | - best error = [ 2342.0294, 2335.8983, 2333.9010, 2331.0766, 2329.9627, 2328.4675, 2325.7468, 2324.2198, 2323.5772, 2320.8897] +25-08-28 13:35:37 | D | - iter = [ 20, 21] +25-08-28 13:35:37 | D | - error = [ 2318.1562, 2321.7118] +25-08-28 13:35:37 | D | - best error = [ 2318.1562, 2318.1562] +25-08-28 13:35:37 | D | + Adding low-rank branches to transformer_blocks.2.attn.to_out.0 +25-08-28 13:35:37 | D | - Calibrating low-rank branch for transformer_blocks.2.attn.to_add_out +25-08-28 13:35:37 | D | + w: sint4 +25-08-28 13:35:37 | D | + x: sint4 +25-08-28 13:35:37 | D | + y: None +25-08-28 13:35:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:35:37 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:35:37 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:35:38 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:35:45 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 13:35:45 | D | - error = [ 1074.0380, 1066.1595, 1057.7937, 1054.7118, 1045.2429, 1042.3537, 1042.6189] +25-08-28 13:35:45 | D | - best error = [ 1074.0380, 1066.1595, 1057.7937, 1054.7118, 1045.2429, 1042.3537, 1042.3537] +25-08-28 13:35:45 | D | + Adding low-rank branches to transformer_blocks.2.attn.to_add_out +25-08-28 13:35:45 | D | - Calibrating low-rank branch for transformer_blocks.2.ff.net.0.proj +25-08-28 13:35:45 | D | + w: sint4 +25-08-28 13:35:45 | D | + x: sint4 +25-08-28 13:35:45 | D | + y: None +25-08-28 13:35:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:35:45 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:35:45 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:35:47 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:36:06 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:36:06 | D | - error = [ 1869.1690, 1846.1303, 1836.1234, 1830.9598, 1827.4837, 1824.4901, 1823.7530, 1820.7000, 1819.8194, 1818.0602] +25-08-28 13:36:06 | D | - best error = [ 1869.1690, 1846.1303, 1836.1234, 1830.9598, 1827.4837, 1824.4901, 1823.7530, 1820.7000, 1819.8194, 1818.0602] +25-08-28 13:36:08 | D | - iter = [ 10] +25-08-28 13:36:08 | D | - error = [ 1818.9784] +25-08-28 13:36:08 | D | - best error = [ 1818.0602] +25-08-28 13:36:08 | D | + Adding low-rank branches to transformer_blocks.2.ff.net.0.proj +25-08-28 13:36:08 | D | - Calibrating low-rank branch for transformer_blocks.2.ff.net.2.linear +25-08-28 13:36:08 | D | + w: sint4 +25-08-28 13:36:08 | D | + x: sint4 +25-08-28 13:36:08 | D | + y: None +25-08-28 13:36:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:36:08 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:36:08 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:36:10 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:36:35 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:36:35 | D | - error = [ 3516.2681, 3432.0358, 3387.0103, 3349.7654, 3318.4697, 3299.0297, 3284.0341, 3280.3036, 3261.6208, 3245.4781] +25-08-28 13:36:35 | D | - best error = [ 3516.2681, 3432.0358, 3387.0103, 3349.7654, 3318.4697, 3299.0297, 3284.0341, 3280.3036, 3261.6208, 3245.4781] +25-08-28 13:36:40 | D | - iter = [ 10, 11] +25-08-28 13:36:40 | D | - error = [ 3240.9803, 3250.4464] +25-08-28 13:36:40 | D | - best error = [ 3240.9803, 3240.9803] +25-08-28 13:36:40 | D | + Adding low-rank branches to transformer_blocks.2.ff.net.2.linear +25-08-28 13:36:41 | D | - Calibrating low-rank branch for transformer_blocks.2.ff_context.net.0.proj +25-08-28 13:36:41 | D | + w: sint4 +25-08-28 13:36:41 | D | + x: sint4 +25-08-28 13:36:41 | D | + y: None +25-08-28 13:36:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:36:41 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:36:41 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:36:41 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:36:53 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:36:53 | D | - error = [ 2526.8311, 2486.6619, 2476.1494, 2462.5948, 2451.9786, 2442.7656, 2436.6762, 2432.1539, 2426.3033, 2426.3144] +25-08-28 13:36:53 | D | - best error = [ 2526.8311, 2486.6619, 2476.1494, 2462.5948, 2451.9786, 2442.7656, 2436.6762, 2432.1539, 2426.3033, 2426.3033] +25-08-28 13:36:53 | D | + Adding low-rank branches to transformer_blocks.2.ff_context.net.0.proj +25-08-28 13:36:53 | D | - Calibrating low-rank branch for transformer_blocks.2.ff_context.net.2.linear +25-08-28 13:36:53 | D | + w: sint4 +25-08-28 13:36:53 | D | + x: sint4 +25-08-28 13:36:53 | D | + y: None +25-08-28 13:36:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:36:53 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:36:53 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:36:53 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:37:05 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:37:05 | D | - error = [ 498.5299, 491.8713, 491.0258, 487.9826, 485.3517, 484.8904, 483.9228, 483.3147, 483.0451, 483.8363] +25-08-28 13:37:05 | D | - best error = [ 498.5299, 491.8713, 491.0258, 487.9826, 485.3517, 484.8904, 483.9228, 483.3147, 483.0451, 483.0451] +25-08-28 13:37:06 | D | + Adding low-rank branches to transformer_blocks.2.ff_context.net.2.linear +25-08-28 13:37:24 | D | - Calibrating low-rank branches of block transformer_blocks.3 +25-08-28 13:37:24 | D | - Calibrating low-rank branch for transformer_blocks.3.attn.to_q, transformer_blocks.3.attn.to_k, transformer_blocks.3.attn.to_v +25-08-28 13:37:24 | D | + w: sint4 +25-08-28 13:37:24 | D | + x: sint4 +25-08-28 13:37:24 | D | + y: None +25-08-28 13:37:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:37:24 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:37:24 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:37:25 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:37:40 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 13:37:40 | D | - error = [ 2259.7031, 2151.9318, 2115.9602, 2052.0048, 2024.4082, 2013.3020, 1977.9987, 2004.2237] +25-08-28 13:37:40 | D | - best error = [ 2259.7031, 2151.9318, 2115.9602, 2052.0048, 2024.4082, 2013.3020, 1977.9987, 1977.9987] +25-08-28 13:37:40 | D | + Adding low-rank branches to transformer_blocks.3.attn.to_q, transformer_blocks.3.attn.to_k, transformer_blocks.3.attn.to_v +25-08-28 13:37:40 | D | - Calibrating low-rank branch for transformer_blocks.3.attn.add_q_proj, transformer_blocks.3.attn.add_k_proj, transformer_blocks.3.attn.add_v_proj +25-08-28 13:37:40 | D | + w: sint4 +25-08-28 13:37:40 | D | + x: sint4 +25-08-28 13:37:40 | D | + y: None +25-08-28 13:37:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:37:40 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:37:40 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:37:41 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:37:52 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 13:37:52 | D | - error = [ 1016.8872, 1005.7387, 1000.4672, 999.5783, 991.7926, 983.1701, 998.7629] +25-08-28 13:37:52 | D | - best error = [ 1016.8872, 1005.7387, 1000.4672, 999.5783, 991.7926, 983.1701, 983.1701] +25-08-28 13:37:53 | D | + Adding low-rank branches to transformer_blocks.3.attn.add_q_proj, transformer_blocks.3.attn.add_k_proj, transformer_blocks.3.attn.add_v_proj +25-08-28 13:37:53 | D | - Calibrating low-rank branch for transformer_blocks.3.attn.to_out.0 +25-08-28 13:37:53 | D | + w: sint4 +25-08-28 13:37:53 | D | + x: sint4 +25-08-28 13:37:53 | D | + y: None +25-08-28 13:37:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:37:53 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:37:53 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:37:54 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:38:09 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:38:09 | D | - error = [ 2524.3126, 2466.0784, 2429.8998, 2409.0962, 2393.8795, 2383.6958, 2378.1432, 2372.3004, 2369.5447, 2365.3864] +25-08-28 13:38:09 | D | - best error = [ 2524.3126, 2466.0784, 2429.8998, 2409.0962, 2393.8795, 2383.6958, 2378.1432, 2372.3004, 2369.5447, 2365.3864] +25-08-28 13:38:15 | D | - iter = [ 10, 11, 12, 13] +25-08-28 13:38:15 | D | - error = [ 2362.8218, 2360.3544, 2357.5161, 2358.3367] +25-08-28 13:38:15 | D | - best error = [ 2362.8218, 2360.3544, 2357.5161, 2357.5161] +25-08-28 13:38:15 | D | + Adding low-rank branches to transformer_blocks.3.attn.to_out.0 +25-08-28 13:38:15 | D | - Calibrating low-rank branch for transformer_blocks.3.attn.to_add_out +25-08-28 13:38:15 | D | + w: sint4 +25-08-28 13:38:15 | D | + x: sint4 +25-08-28 13:38:15 | D | + y: None +25-08-28 13:38:15 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:38:15 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 13:38:15 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 13:38:16 | D | + finished calculating the original outputs, ram usage: 19.1 +25-08-28 13:38:26 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:38:26 | D | - error = [ 1191.1754, 1183.7361, 1181.8078, 1177.2821, 1172.3788, 1170.8085, 1169.9387, 1167.7948, 1164.5968, 1165.5600] +25-08-28 13:38:26 | D | - best error = [ 1191.1754, 1183.7361, 1181.8078, 1177.2821, 1172.3788, 1170.8085, 1169.9387, 1167.7948, 1164.5968, 1164.5968] +25-08-28 13:38:26 | D | + Adding low-rank branches to transformer_blocks.3.attn.to_add_out +25-08-28 13:38:26 | D | - Calibrating low-rank branch for transformer_blocks.3.ff.net.0.proj +25-08-28 13:38:26 | D | + w: sint4 +25-08-28 13:38:26 | D | + x: sint4 +25-08-28 13:38:26 | D | + y: None +25-08-28 13:38:26 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:38:26 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:38:26 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:38:27 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:38:47 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:38:47 | D | - error = [ 3449.1026, 3413.1742, 3391.4451, 3383.6251, 3366.5812, 3360.4849, 3356.8566, 3353.0398, 3344.6800, 3343.8846] +25-08-28 13:38:47 | D | - best error = [ 3449.1026, 3413.1742, 3391.4451, 3383.6251, 3366.5812, 3360.4849, 3356.8566, 3353.0398, 3344.6800, 3343.8846] +25-08-28 13:38:51 | D | - iter = [ 10, 11] +25-08-28 13:38:51 | D | - error = [ 3342.6208, 3343.3491] +25-08-28 13:38:51 | D | - best error = [ 3342.6208, 3342.6208] +25-08-28 13:38:51 | D | + Adding low-rank branches to transformer_blocks.3.ff.net.0.proj +25-08-28 13:38:51 | D | - Calibrating low-rank branch for transformer_blocks.3.ff.net.2.linear +25-08-28 13:38:51 | D | + w: sint4 +25-08-28 13:38:51 | D | + x: sint4 +25-08-28 13:38:51 | D | + y: None +25-08-28 13:38:51 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:38:51 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:38:51 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:38:59 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:39:27 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:39:27 | D | - error = [ 4639.9565, 4596.3481, 4567.7115, 4533.2814, 4526.7799, 4522.6789, 4514.3048, 4512.4173, 4504.3055, 4499.5893] +25-08-28 13:39:27 | D | - best error = [ 4639.9565, 4596.3481, 4567.7115, 4533.2814, 4526.7799, 4522.6789, 4514.3048, 4512.4173, 4504.3055, 4499.5893] +25-08-28 13:39:32 | D | - iter = [ 10, 11] +25-08-28 13:39:32 | D | - error = [ 4489.1774, 4492.0060] +25-08-28 13:39:32 | D | - best error = [ 4489.1774, 4489.1774] +25-08-28 13:39:32 | D | + Adding low-rank branches to transformer_blocks.3.ff.net.2.linear +25-08-28 13:39:32 | D | - Calibrating low-rank branch for transformer_blocks.3.ff_context.net.0.proj +25-08-28 13:39:32 | D | + w: sint4 +25-08-28 13:39:32 | D | + x: sint4 +25-08-28 13:39:32 | D | + y: None +25-08-28 13:39:32 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:39:32 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:39:32 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:39:33 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:39:45 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:39:45 | D | - error = [ 1578.0270, 1554.8847, 1533.1982, 1522.3328, 1514.8822, 1512.0745, 1506.9701, 1500.3709, 1497.3371, 1493.3715] +25-08-28 13:39:45 | D | - best error = [ 1578.0270, 1554.8847, 1533.1982, 1522.3328, 1514.8822, 1512.0745, 1506.9701, 1500.3709, 1497.3371, 1493.3715] +25-08-28 13:39:47 | D | - iter = [ 10, 11] +25-08-28 13:39:47 | D | - error = [ 1491.9819, 1492.0146] +25-08-28 13:39:47 | D | - best error = [ 1491.9819, 1491.9819] +25-08-28 13:39:47 | D | + Adding low-rank branches to transformer_blocks.3.ff_context.net.0.proj +25-08-28 13:39:47 | D | - Calibrating low-rank branch for transformer_blocks.3.ff_context.net.2.linear +25-08-28 13:39:47 | D | + w: sint4 +25-08-28 13:39:47 | D | + x: sint4 +25-08-28 13:39:47 | D | + y: None +25-08-28 13:39:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:39:47 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:39:47 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:39:49 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:39:56 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 13:39:56 | D | - error = [ 1817.4541, 1784.1303, 1753.8807, 1737.0569, 1726.8703, 1727.9483] +25-08-28 13:39:56 | D | - best error = [ 1817.4541, 1784.1303, 1753.8807, 1737.0569, 1726.8703, 1726.8703] +25-08-28 13:39:56 | D | + Adding low-rank branches to transformer_blocks.3.ff_context.net.2.linear +25-08-28 13:40:16 | D | - Calibrating low-rank branches of block transformer_blocks.4 +25-08-28 13:40:16 | D | - Calibrating low-rank branch for transformer_blocks.4.attn.to_q, transformer_blocks.4.attn.to_k, transformer_blocks.4.attn.to_v +25-08-28 13:40:16 | D | + w: sint4 +25-08-28 13:40:16 | D | + x: sint4 +25-08-28 13:40:16 | D | + y: None +25-08-28 13:40:16 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:40:16 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:40:16 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:40:17 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:40:36 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:40:36 | D | - error = [ 3933.7572, 3774.7238, 3675.8455, 3636.1843, 3606.7171, 3572.7032, 3522.0598, 3460.6619, 3452.7134, 3438.1275] +25-08-28 13:40:36 | D | - best error = [ 3933.7572, 3774.7238, 3675.8455, 3636.1843, 3606.7171, 3572.7032, 3522.0598, 3460.6619, 3452.7134, 3438.1275] +25-08-28 13:40:40 | D | - iter = [ 10, 11] +25-08-28 13:40:40 | D | - error = [ 3400.8237, 3446.1818] +25-08-28 13:40:40 | D | - best error = [ 3400.8237, 3400.8237] +25-08-28 13:40:40 | D | + Adding low-rank branches to transformer_blocks.4.attn.to_q, transformer_blocks.4.attn.to_k, transformer_blocks.4.attn.to_v +25-08-28 13:40:40 | D | - Calibrating low-rank branch for transformer_blocks.4.attn.add_q_proj, transformer_blocks.4.attn.add_k_proj, transformer_blocks.4.attn.add_v_proj +25-08-28 13:40:40 | D | + w: sint4 +25-08-28 13:40:40 | D | + x: sint4 +25-08-28 13:40:40 | D | + y: None +25-08-28 13:40:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:40:40 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:40:40 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:40:41 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:40:54 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 13:40:54 | D | - error = [ 1033.2278, 1013.6720, 990.6572, 987.8942, 984.5852, 983.3724, 978.6837, 979.7548] +25-08-28 13:40:54 | D | - best error = [ 1033.2278, 1013.6720, 990.6572, 987.8942, 984.5852, 983.3724, 978.6837, 978.6837] +25-08-28 13:40:54 | D | + Adding low-rank branches to transformer_blocks.4.attn.add_q_proj, transformer_blocks.4.attn.add_k_proj, transformer_blocks.4.attn.add_v_proj +25-08-28 13:40:54 | D | - Calibrating low-rank branch for transformer_blocks.4.attn.to_out.0 +25-08-28 13:40:54 | D | + w: sint4 +25-08-28 13:40:54 | D | + x: sint4 +25-08-28 13:40:54 | D | + y: None +25-08-28 13:40:54 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:40:54 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:40:54 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:40:55 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:41:10 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:41:10 | D | - error = [ 3821.7505, 3753.7547, 3713.5514, 3684.7255, 3667.7981, 3652.9093, 3644.0648, 3632.8993, 3624.6765, 3619.7464] +25-08-28 13:41:10 | D | - best error = [ 3821.7505, 3753.7547, 3713.5514, 3684.7255, 3667.7981, 3652.9093, 3644.0648, 3632.8993, 3624.6765, 3619.7464] +25-08-28 13:41:24 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 13:41:24 | D | - error = [ 3612.9116, 3609.9353, 3609.5573, 3606.7318, 3604.5280, 3599.9021, 3596.9398, 3595.0257, 3593.0984, 3591.6647] +25-08-28 13:41:24 | D | - best error = [ 3612.9116, 3609.9353, 3609.5573, 3606.7318, 3604.5280, 3599.9021, 3596.9398, 3595.0257, 3593.0984, 3591.6647] +25-08-28 13:41:28 | D | - iter = [ 20, 21, 22] +25-08-28 13:41:28 | D | - error = [ 3589.4828, 3588.9804, 3589.4257] +25-08-28 13:41:28 | D | - best error = [ 3589.4828, 3588.9804, 3588.9804] +25-08-28 13:41:28 | D | + Adding low-rank branches to transformer_blocks.4.attn.to_out.0 +25-08-28 13:41:28 | D | - Calibrating low-rank branch for transformer_blocks.4.attn.to_add_out +25-08-28 13:41:28 | D | + w: sint4 +25-08-28 13:41:28 | D | + x: sint4 +25-08-28 13:41:28 | D | + y: None +25-08-28 13:41:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:41:28 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:41:28 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:41:29 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:41:39 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:41:39 | D | - error = [ 1034.6605, 1024.0032, 1019.6620, 1014.0364, 1011.8409, 1010.1817, 1006.7776, 1004.8252, 1003.5266, 1004.6415] +25-08-28 13:41:39 | D | - best error = [ 1034.6605, 1024.0032, 1019.6620, 1014.0364, 1011.8409, 1010.1817, 1006.7776, 1004.8252, 1003.5266, 1003.5266] +25-08-28 13:41:39 | D | + Adding low-rank branches to transformer_blocks.4.attn.to_add_out +25-08-28 13:41:40 | D | - Calibrating low-rank branch for transformer_blocks.4.ff.net.0.proj +25-08-28 13:41:40 | D | + w: sint4 +25-08-28 13:41:40 | D | + x: sint4 +25-08-28 13:41:40 | D | + y: None +25-08-28 13:41:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:41:40 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:41:40 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:41:41 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:42:02 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:42:02 | D | - error = [ 8228.1838, 8166.3240, 8131.4196, 8102.8632, 8081.0660, 8069.2540, 8063.7579, 8056.0785, 8053.2247, 8046.6147] +25-08-28 13:42:02 | D | - best error = [ 8228.1838, 8166.3240, 8131.4196, 8102.8632, 8081.0660, 8069.2540, 8063.7579, 8056.0785, 8053.2247, 8046.6147] +25-08-28 13:42:14 | D | - iter = [ 10, 11, 12, 13, 14, 15] +25-08-28 13:42:14 | D | - error = [ 8043.4853, 8032.6596, 8031.9875, 8023.8629, 8019.7180, 8021.5978] +25-08-28 13:42:14 | D | - best error = [ 8043.4853, 8032.6596, 8031.9875, 8023.8629, 8019.7180, 8019.7180] +25-08-28 13:42:14 | D | + Adding low-rank branches to transformer_blocks.4.ff.net.0.proj +25-08-28 13:42:14 | D | - Calibrating low-rank branch for transformer_blocks.4.ff.net.2.linear +25-08-28 13:42:14 | D | + w: sint4 +25-08-28 13:42:14 | D | + x: sint4 +25-08-28 13:42:14 | D | + y: None +25-08-28 13:42:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:42:14 | D | + finished parsing calibration arguments, ram usage: 17.6 +25-08-28 13:42:14 | D | + finished resetting calibrator, ram usage: 17.6 +25-08-28 13:42:25 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:42:38 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-28 13:42:38 | D | - error = [10838.5017, 10795.6676, 10779.3710, 10769.2990, 10773.7073] +25-08-28 13:42:38 | D | - best error = [10838.5017, 10795.6676, 10779.3710, 10769.2990, 10769.2990] +25-08-28 13:42:38 | D | + Adding low-rank branches to transformer_blocks.4.ff.net.2.linear +25-08-28 13:42:38 | D | - Calibrating low-rank branch for transformer_blocks.4.ff_context.net.0.proj +25-08-28 13:42:38 | D | + w: sint4 +25-08-28 13:42:38 | D | + x: sint4 +25-08-28 13:42:38 | D | + y: None +25-08-28 13:42:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:42:38 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:42:38 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:42:39 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:42:50 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:42:50 | D | - error = [ 1348.4637, 1320.8365, 1298.9488, 1280.7269, 1270.9299, 1257.0275, 1251.6743, 1247.4604, 1243.1099, 1241.7205] +25-08-28 13:42:50 | D | - best error = [ 1348.4637, 1320.8365, 1298.9488, 1280.7269, 1270.9299, 1257.0275, 1251.6743, 1247.4604, 1243.1099, 1241.7205] +25-08-28 13:42:56 | D | - iter = [ 10, 11, 12, 13, 14] +25-08-28 13:42:56 | D | - error = [ 1238.4189, 1236.3140, 1234.6243, 1232.9810, 1234.1650] +25-08-28 13:42:56 | D | - best error = [ 1238.4189, 1236.3140, 1234.6243, 1232.9810, 1232.9810] +25-08-28 13:42:56 | D | + Adding low-rank branches to transformer_blocks.4.ff_context.net.0.proj +25-08-28 13:42:56 | D | - Calibrating low-rank branch for transformer_blocks.4.ff_context.net.2.linear +25-08-28 13:42:56 | D | + w: sint4 +25-08-28 13:42:57 | D | + x: sint4 +25-08-28 13:42:57 | D | + y: None +25-08-28 13:42:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:42:57 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:42:57 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:42:57 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:43:06 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 13:43:06 | D | - error = [ 504.2892, 498.7990, 493.7933, 490.6412, 487.5253, 485.3266, 486.1847] +25-08-28 13:43:06 | D | - best error = [ 504.2892, 498.7990, 493.7933, 490.6412, 487.5253, 485.3266, 485.3266] +25-08-28 13:43:06 | D | + Adding low-rank branches to transformer_blocks.4.ff_context.net.2.linear +25-08-28 13:43:25 | D | - Calibrating low-rank branches of block transformer_blocks.5 +25-08-28 13:43:25 | D | - Calibrating low-rank branch for transformer_blocks.5.attn.to_q, transformer_blocks.5.attn.to_k, transformer_blocks.5.attn.to_v +25-08-28 13:43:25 | D | + w: sint4 +25-08-28 13:43:25 | D | + x: sint4 +25-08-28 13:43:25 | D | + y: None +25-08-28 13:43:25 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:43:25 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:43:25 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:43:26 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:43:38 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 13:43:38 | D | - error = [ 3149.9990, 3075.1951, 2981.8244, 2945.3771, 2931.9932, 2933.4069] +25-08-28 13:43:38 | D | - best error = [ 3149.9990, 3075.1951, 2981.8244, 2945.3771, 2931.9932, 2931.9932] +25-08-28 13:43:38 | D | + Adding low-rank branches to transformer_blocks.5.attn.to_q, transformer_blocks.5.attn.to_k, transformer_blocks.5.attn.to_v +25-08-28 13:43:39 | D | - Calibrating low-rank branch for transformer_blocks.5.attn.add_q_proj, transformer_blocks.5.attn.add_k_proj, transformer_blocks.5.attn.add_v_proj +25-08-28 13:43:39 | D | + w: sint4 +25-08-28 13:43:39 | D | + x: sint4 +25-08-28 13:43:39 | D | + y: None +25-08-28 13:43:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:43:39 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:43:39 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:43:40 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:43:51 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 13:43:51 | D | - error = [ 626.4973, 616.0746, 611.7523, 609.2446, 607.4004, 605.4557, 606.8719] +25-08-28 13:43:51 | D | - best error = [ 626.4973, 616.0746, 611.7523, 609.2446, 607.4004, 605.4557, 605.4557] +25-08-28 13:43:52 | D | + Adding low-rank branches to transformer_blocks.5.attn.add_q_proj, transformer_blocks.5.attn.add_k_proj, transformer_blocks.5.attn.add_v_proj +25-08-28 13:43:52 | D | - Calibrating low-rank branch for transformer_blocks.5.attn.to_out.0 +25-08-28 13:43:52 | D | + w: sint4 +25-08-28 13:43:52 | D | + x: sint4 +25-08-28 13:43:52 | D | + y: None +25-08-28 13:43:52 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:43:52 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:43:52 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:43:53 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:44:08 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:44:08 | D | - error = [ 4185.4510, 4137.5014, 4105.3061, 4087.3978, 4070.9883, 4057.0504, 4050.6929, 4041.2993, 4035.2787, 4030.7844] +25-08-28 13:44:08 | D | - best error = [ 4185.4510, 4137.5014, 4105.3061, 4087.3978, 4070.9883, 4057.0504, 4050.6929, 4041.2993, 4035.2787, 4030.7844] +25-08-28 13:44:23 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 13:44:23 | D | - error = [ 4028.4399, 4025.5939, 4023.6008, 4019.7639, 4017.8783, 4013.9883, 4009.8237, 4007.7622, 4004.7456, 4003.6751] +25-08-28 13:44:23 | D | - best error = [ 4028.4399, 4025.5939, 4023.6008, 4019.7639, 4017.8783, 4013.9883, 4009.8237, 4007.7622, 4004.7456, 4003.6751] +25-08-28 13:44:38 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 13:44:38 | D | - error = [ 4000.2584, 3999.5810, 3999.0486, 3998.2162, 3995.7381, 3993.9417, 3990.9995, 3990.3197, 3989.9935, 3988.2803] +25-08-28 13:44:38 | D | - best error = [ 4000.2584, 3999.5810, 3999.0486, 3998.2162, 3995.7381, 3993.9417, 3990.9995, 3990.3197, 3989.9935, 3988.2803] +25-08-28 13:44:53 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-28 13:44:53 | D | - error = [ 3986.3531, 3984.5558, 3983.6409, 3983.5470, 3982.7262, 3981.3239, 3980.6128, 3979.3455, 3978.6635, 3979.6078] +25-08-28 13:44:53 | D | - best error = [ 3986.3531, 3984.5558, 3983.6409, 3983.5470, 3982.7262, 3981.3239, 3980.6128, 3979.3455, 3978.6635, 3978.6635] +25-08-28 13:44:53 | D | + Adding low-rank branches to transformer_blocks.5.attn.to_out.0 +25-08-28 13:44:54 | D | - Calibrating low-rank branch for transformer_blocks.5.attn.to_add_out +25-08-28 13:44:54 | D | + w: sint4 +25-08-28 13:44:54 | D | + x: sint4 +25-08-28 13:44:54 | D | + y: None +25-08-28 13:44:54 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:44:54 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:44:54 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:44:54 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:45:04 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:45:04 | D | - error = [ 1151.5536, 1138.1620, 1131.1208, 1125.8839, 1122.7383, 1117.9099, 1114.1010, 1111.3973, 1110.1007, 1108.6207] +25-08-28 13:45:04 | D | - best error = [ 1151.5536, 1138.1620, 1131.1208, 1125.8839, 1122.7383, 1117.9099, 1114.1010, 1111.3973, 1110.1007, 1108.6207] +25-08-28 13:45:12 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17] +25-08-28 13:45:12 | D | - error = [ 1107.6017, 1106.9001, 1105.6219, 1104.2744, 1103.8470, 1103.1573, 1101.6319, 1102.3443] +25-08-28 13:45:12 | D | - best error = [ 1107.6017, 1106.9001, 1105.6219, 1104.2744, 1103.8470, 1103.1573, 1101.6319, 1101.6319] +25-08-28 13:45:12 | D | + Adding low-rank branches to transformer_blocks.5.attn.to_add_out +25-08-28 13:45:13 | D | - Calibrating low-rank branch for transformer_blocks.5.ff.net.0.proj +25-08-28 13:45:13 | D | + w: sint4 +25-08-28 13:45:13 | D | + x: sint4 +25-08-28 13:45:13 | D | + y: None +25-08-28 13:45:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:45:13 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:45:13 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:45:14 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:45:35 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:45:35 | D | - error = [ 7321.3321, 7238.5164, 7173.5520, 7126.0177, 7098.4083, 7075.7462, 7044.2286, 7033.0867, 7020.8601, 7011.0972] +25-08-28 13:45:35 | D | - best error = [ 7321.3321, 7238.5164, 7173.5520, 7126.0177, 7098.4083, 7075.7462, 7044.2286, 7033.0867, 7020.8601, 7011.0972] +25-08-28 13:45:56 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 13:45:56 | D | - error = [ 7000.9874, 6991.0920, 6991.0348, 6981.3864, 6977.5405, 6974.6925, 6971.6552, 6969.4668, 6965.9509, 6961.8038] +25-08-28 13:45:56 | D | - best error = [ 7000.9874, 6991.0920, 6991.0348, 6981.3864, 6977.5405, 6974.6925, 6971.6552, 6969.4668, 6965.9509, 6961.8038] +25-08-28 13:45:58 | D | - iter = [ 20] +25-08-28 13:45:58 | D | - error = [ 6964.2318] +25-08-28 13:45:58 | D | - best error = [ 6961.8038] +25-08-28 13:45:58 | D | + Adding low-rank branches to transformer_blocks.5.ff.net.0.proj +25-08-28 13:45:58 | D | - Calibrating low-rank branch for transformer_blocks.5.ff.net.2.linear +25-08-28 13:45:58 | D | + w: sint4 +25-08-28 13:45:58 | D | + x: sint4 +25-08-28 13:45:58 | D | + y: None +25-08-28 13:45:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:45:58 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:45:58 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:46:03 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:46:28 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 13:46:28 | D | - error = [10789.7511, 10731.9717, 10719.9362, 10704.3685, 10665.5164, 10643.7813, 10628.6688, 10634.0036] +25-08-28 13:46:28 | D | - best error = [10789.7511, 10731.9717, 10719.9362, 10704.3685, 10665.5164, 10643.7813, 10628.6688, 10628.6688] +25-08-28 13:46:28 | D | + Adding low-rank branches to transformer_blocks.5.ff.net.2.linear +25-08-28 13:46:28 | D | - Calibrating low-rank branch for transformer_blocks.5.ff_context.net.0.proj +25-08-28 13:46:28 | D | + w: sint4 +25-08-28 13:46:28 | D | + x: sint4 +25-08-28 13:46:28 | D | + y: None +25-08-28 13:46:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:46:28 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:46:28 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:46:29 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:46:40 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:46:40 | D | - error = [ 1232.5768, 1206.7078, 1185.6029, 1172.7149, 1163.2460, 1157.5621, 1150.4354, 1145.1500, 1140.7808, 1138.2970] +25-08-28 13:46:40 | D | - best error = [ 1232.5768, 1206.7078, 1185.6029, 1172.7149, 1163.2460, 1157.5621, 1150.4354, 1145.1500, 1140.7808, 1138.2970] +25-08-28 13:46:49 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17] +25-08-28 13:46:49 | D | - error = [ 1137.8285, 1134.3698, 1133.0711, 1131.4803, 1130.1664, 1128.3629, 1126.8814, 1127.1660] +25-08-28 13:46:49 | D | - best error = [ 1137.8285, 1134.3698, 1133.0711, 1131.4803, 1130.1664, 1128.3629, 1126.8814, 1126.8814] +25-08-28 13:46:50 | D | + Adding low-rank branches to transformer_blocks.5.ff_context.net.0.proj +25-08-28 13:46:50 | D | - Calibrating low-rank branch for transformer_blocks.5.ff_context.net.2.linear +25-08-28 13:46:50 | D | + w: sint4 +25-08-28 13:46:50 | D | + x: sint4 +25-08-28 13:46:50 | D | + y: None +25-08-28 13:46:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:46:50 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:46:50 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:46:51 | D | + finished calculating the original outputs, ram usage: 19.0 +25-08-28 13:47:03 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:47:03 | D | - error = [ 494.6635, 486.4195, 480.4748, 476.7672, 473.3591, 471.7944, 471.7747, 468.5432, 467.5478, 467.4065] +25-08-28 13:47:03 | D | - best error = [ 494.6635, 486.4195, 480.4748, 476.7672, 473.3591, 471.7944, 471.7747, 468.5432, 467.5478, 467.4065] +25-08-28 13:47:04 | D | - iter = [ 10] +25-08-28 13:47:04 | D | - error = [ 469.0354] +25-08-28 13:47:04 | D | - best error = [ 467.4065] +25-08-28 13:47:05 | D | + Adding low-rank branches to transformer_blocks.5.ff_context.net.2.linear +25-08-28 13:47:23 | D | - Calibrating low-rank branches of block transformer_blocks.6 +25-08-28 13:47:23 | D | - Calibrating low-rank branch for transformer_blocks.6.attn.to_q, transformer_blocks.6.attn.to_k, transformer_blocks.6.attn.to_v +25-08-28 13:47:23 | D | + w: sint4 +25-08-28 13:47:23 | D | + x: sint4 +25-08-28 13:47:23 | D | + y: None +25-08-28 13:47:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:47:23 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:47:23 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:47:24 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:47:38 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 13:47:38 | D | - error = [ 2764.8152, 2569.6481, 2478.7943, 2430.7280, 2399.3207, 2372.9408, 2381.1665] +25-08-28 13:47:38 | D | - best error = [ 2764.8152, 2569.6481, 2478.7943, 2430.7280, 2399.3207, 2372.9408, 2372.9408] +25-08-28 13:47:38 | D | + Adding low-rank branches to transformer_blocks.6.attn.to_q, transformer_blocks.6.attn.to_k, transformer_blocks.6.attn.to_v +25-08-28 13:47:38 | D | - Calibrating low-rank branch for transformer_blocks.6.attn.add_q_proj, transformer_blocks.6.attn.add_k_proj, transformer_blocks.6.attn.add_v_proj +25-08-28 13:47:38 | D | + w: sint4 +25-08-28 13:47:38 | D | + x: sint4 +25-08-28 13:47:38 | D | + y: None +25-08-28 13:47:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:47:38 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:47:38 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:47:39 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:47:49 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 13:47:49 | D | - error = [ 1276.0685, 1247.3732, 1245.7792, 1238.0003, 1234.1891, 1236.2700] +25-08-28 13:47:49 | D | - best error = [ 1276.0685, 1247.3732, 1245.7792, 1238.0003, 1234.1891, 1234.1891] +25-08-28 13:47:49 | D | + Adding low-rank branches to transformer_blocks.6.attn.add_q_proj, transformer_blocks.6.attn.add_k_proj, transformer_blocks.6.attn.add_v_proj +25-08-28 13:47:49 | D | - Calibrating low-rank branch for transformer_blocks.6.attn.to_out.0 +25-08-28 13:47:49 | D | + w: sint4 +25-08-28 13:47:50 | D | + x: sint4 +25-08-28 13:47:50 | D | + y: None +25-08-28 13:47:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:47:50 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:47:50 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:47:51 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:48:05 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:48:05 | D | - error = [ 4348.5503, 4296.3589, 4258.2034, 4232.4981, 4214.4831, 4200.2333, 4187.4730, 4176.7578, 4170.4000, 4165.9186] +25-08-28 13:48:05 | D | - best error = [ 4348.5503, 4296.3589, 4258.2034, 4232.4981, 4214.4831, 4200.2333, 4187.4730, 4176.7578, 4170.4000, 4165.9186] +25-08-28 13:48:21 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 13:48:21 | D | - error = [ 4158.4062, 4153.3220, 4149.0037, 4146.8569, 4141.3300, 4139.1323, 4135.6301, 4134.2332, 4131.8439, 4130.1177] +25-08-28 13:48:21 | D | - best error = [ 4158.4062, 4153.3220, 4149.0037, 4146.8569, 4141.3300, 4139.1323, 4135.6301, 4134.2332, 4131.8439, 4130.1177] +25-08-28 13:48:34 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28] +25-08-28 13:48:34 | D | - error = [ 4129.2944, 4128.6637, 4126.9517, 4126.7699, 4123.5679, 4121.3481, 4120.9797, 4120.5580, 4121.3930] +25-08-28 13:48:34 | D | - best error = [ 4129.2944, 4128.6637, 4126.9517, 4126.7699, 4123.5679, 4121.3481, 4120.9797, 4120.5580, 4120.5580] +25-08-28 13:48:34 | D | + Adding low-rank branches to transformer_blocks.6.attn.to_out.0 +25-08-28 13:48:35 | D | - Calibrating low-rank branch for transformer_blocks.6.attn.to_add_out +25-08-28 13:48:35 | D | + w: sint4 +25-08-28 13:48:35 | D | + x: sint4 +25-08-28 13:48:35 | D | + y: None +25-08-28 13:48:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:48:35 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:48:35 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:48:35 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:48:45 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:48:45 | D | - error = [ 1470.0777, 1452.6439, 1445.7371, 1435.3461, 1426.7797, 1421.0862, 1415.7458, 1415.4464, 1413.6811, 1410.1428] +25-08-28 13:48:45 | D | - best error = [ 1470.0777, 1452.6439, 1445.7371, 1435.3461, 1426.7797, 1421.0862, 1415.7458, 1415.4464, 1413.6811, 1410.1428] +25-08-28 13:48:51 | D | - iter = [ 10, 11, 12, 13, 14, 15] +25-08-28 13:48:51 | D | - error = [ 1407.5384, 1406.7231, 1403.3041, 1402.6303, 1401.7124, 1403.9572] +25-08-28 13:48:51 | D | - best error = [ 1407.5384, 1406.7231, 1403.3041, 1402.6303, 1401.7124, 1401.7124] +25-08-28 13:48:52 | D | + Adding low-rank branches to transformer_blocks.6.attn.to_add_out +25-08-28 13:48:52 | D | - Calibrating low-rank branch for transformer_blocks.6.ff.net.0.proj +25-08-28 13:48:52 | D | + w: sint4 +25-08-28 13:48:52 | D | + x: sint4 +25-08-28 13:48:52 | D | + y: None +25-08-28 13:48:52 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:48:52 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:48:52 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:48:53 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:49:14 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:49:14 | D | - error = [ 7114.6634, 7007.0427, 6944.5582, 6898.4236, 6857.6535, 6829.2963, 6812.2666, 6798.5696, 6783.3935, 6773.2285] +25-08-28 13:49:14 | D | - best error = [ 7114.6634, 7007.0427, 6944.5582, 6898.4236, 6857.6535, 6829.2963, 6812.2666, 6798.5696, 6783.3935, 6773.2285] +25-08-28 13:49:35 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 13:49:35 | D | - error = [ 6766.0304, 6758.5907, 6752.0290, 6746.6661, 6741.4930, 6735.5190, 6731.9014, 6728.3517, 6726.2700, 6723.2528] +25-08-28 13:49:35 | D | - best error = [ 6766.0304, 6758.5907, 6752.0290, 6746.6661, 6741.4930, 6735.5190, 6731.9014, 6728.3517, 6726.2700, 6723.2528] +25-08-28 13:49:42 | D | - iter = [ 20, 21, 22, 23] +25-08-28 13:49:42 | D | - error = [ 6722.4734, 6718.1438, 6715.2352, 6716.8728] +25-08-28 13:49:42 | D | - best error = [ 6722.4734, 6718.1438, 6715.2352, 6715.2352] +25-08-28 13:49:43 | D | + Adding low-rank branches to transformer_blocks.6.ff.net.0.proj +25-08-28 13:49:43 | D | - Calibrating low-rank branch for transformer_blocks.6.ff.net.2.linear +25-08-28 13:49:43 | D | + w: sint4 +25-08-28 13:49:43 | D | + x: sint4 +25-08-28 13:49:43 | D | + y: None +25-08-28 13:49:43 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:49:43 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:49:43 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:49:47 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:50:09 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 13:50:09 | D | - error = [12097.0839, 12077.8263, 12049.2795, 12038.2358, 12024.3846, 12003.0283, 12003.8580] +25-08-28 13:50:09 | D | - best error = [12097.0839, 12077.8263, 12049.2795, 12038.2358, 12024.3846, 12003.0283, 12003.0283] +25-08-28 13:50:09 | D | + Adding low-rank branches to transformer_blocks.6.ff.net.2.linear +25-08-28 13:50:09 | D | - Calibrating low-rank branch for transformer_blocks.6.ff_context.net.0.proj +25-08-28 13:50:09 | D | + w: sint4 +25-08-28 13:50:09 | D | + x: sint4 +25-08-28 13:50:09 | D | + y: None +25-08-28 13:50:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:50:09 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:50:09 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:50:10 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:50:21 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:50:21 | D | - error = [ 1270.0327, 1236.5667, 1217.5214, 1200.8175, 1191.0027, 1178.9006, 1173.8244, 1166.5504, 1162.2151, 1160.1348] +25-08-28 13:50:21 | D | - best error = [ 1270.0327, 1236.5667, 1217.5214, 1200.8175, 1191.0027, 1178.9006, 1173.8244, 1166.5504, 1162.2151, 1160.1348] +25-08-28 13:50:23 | D | - iter = [ 10, 11] +25-08-28 13:50:23 | D | - error = [ 1157.1859, 1157.4990] +25-08-28 13:50:23 | D | - best error = [ 1157.1859, 1157.1859] +25-08-28 13:50:23 | D | + Adding low-rank branches to transformer_blocks.6.ff_context.net.0.proj +25-08-28 13:50:24 | D | - Calibrating low-rank branch for transformer_blocks.6.ff_context.net.2.linear +25-08-28 13:50:24 | D | + w: sint4 +25-08-28 13:50:24 | D | + x: sint4 +25-08-28 13:50:24 | D | + y: None +25-08-28 13:50:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:50:24 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:50:24 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:50:24 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:50:37 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:50:37 | D | - error = [ 575.5659, 564.3311, 561.7214, 559.6295, 556.9710, 554.0203, 550.1475, 547.8255, 544.4292, 544.5811] +25-08-28 13:50:37 | D | - best error = [ 575.5659, 564.3311, 561.7214, 559.6295, 556.9710, 554.0203, 550.1475, 547.8255, 544.4292, 544.4292] +25-08-28 13:50:37 | D | + Adding low-rank branches to transformer_blocks.6.ff_context.net.2.linear +25-08-28 13:50:56 | D | - Calibrating low-rank branches of block transformer_blocks.7 +25-08-28 13:50:56 | D | - Calibrating low-rank branch for transformer_blocks.7.attn.to_q, transformer_blocks.7.attn.to_k, transformer_blocks.7.attn.to_v +25-08-28 13:50:56 | D | + w: sint4 +25-08-28 13:50:56 | D | + x: sint4 +25-08-28 13:50:56 | D | + y: None +25-08-28 13:50:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:50:56 | D | + finished parsing calibration arguments, ram usage: 16.1 +25-08-28 13:50:56 | D | + finished resetting calibrator, ram usage: 16.1 +25-08-28 13:50:57 | D | + finished calculating the original outputs, ram usage: 16.2 +25-08-28 13:51:09 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 13:51:09 | D | - error = [ 3341.1435, 3083.3764, 2980.2679, 2925.5018, 2889.1504, 2897.2497] +25-08-28 13:51:09 | D | - best error = [ 3341.1435, 3083.3764, 2980.2679, 2925.5018, 2889.1504, 2889.1504] +25-08-28 13:51:09 | D | + Adding low-rank branches to transformer_blocks.7.attn.to_q, transformer_blocks.7.attn.to_k, transformer_blocks.7.attn.to_v +25-08-28 13:51:09 | D | - Calibrating low-rank branch for transformer_blocks.7.attn.add_q_proj, transformer_blocks.7.attn.add_k_proj, transformer_blocks.7.attn.add_v_proj +25-08-28 13:51:09 | D | + w: sint4 +25-08-28 13:51:09 | D | + x: sint4 +25-08-28 13:51:09 | D | + y: None +25-08-28 13:51:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:51:09 | D | + finished parsing calibration arguments, ram usage: 18.5 +25-08-28 13:51:09 | D | + finished resetting calibrator, ram usage: 18.5 +25-08-28 13:51:10 | D | + finished calculating the original outputs, ram usage: 18.7 +25-08-28 13:51:20 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 13:51:20 | D | - error = [ 1000.6022, 974.3517, 964.2062, 959.9792, 956.7545, 960.5864] +25-08-28 13:51:20 | D | - best error = [ 1000.6022, 974.3517, 964.2062, 959.9792, 956.7545, 956.7545] +25-08-28 13:51:20 | D | + Adding low-rank branches to transformer_blocks.7.attn.add_q_proj, transformer_blocks.7.attn.add_k_proj, transformer_blocks.7.attn.add_v_proj +25-08-28 13:51:21 | D | - Calibrating low-rank branch for transformer_blocks.7.attn.to_out.0 +25-08-28 13:51:21 | D | + w: sint4 +25-08-28 13:51:21 | D | + x: sint4 +25-08-28 13:51:21 | D | + y: None +25-08-28 13:51:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:51:21 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:51:21 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:51:22 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:51:37 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:51:37 | D | - error = [ 5783.2873, 5717.6123, 5672.1610, 5644.5783, 5623.8087, 5603.7714, 5590.4824, 5579.2150, 5569.3351, 5558.6621] +25-08-28 13:51:37 | D | - best error = [ 5783.2873, 5717.6123, 5672.1610, 5644.5783, 5623.8087, 5603.7714, 5590.4824, 5579.2150, 5569.3351, 5558.6621] +25-08-28 13:51:52 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 13:51:52 | D | - error = [ 5551.6109, 5544.5534, 5538.5487, 5535.1983, 5532.8776, 5530.4564, 5526.5787, 5524.2922, 5523.2724, 5518.0285] +25-08-28 13:51:52 | D | - best error = [ 5551.6109, 5544.5534, 5538.5487, 5535.1983, 5532.8776, 5530.4564, 5526.5787, 5524.2922, 5523.2724, 5518.0285] +25-08-28 13:52:07 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 13:52:07 | D | - error = [ 5515.3390, 5513.3072, 5511.4967, 5510.7074, 5509.1916, 5505.5574, 5502.9817, 5500.6380, 5500.3180, 5501.0406] +25-08-28 13:52:07 | D | - best error = [ 5515.3390, 5513.3072, 5511.4967, 5510.7074, 5509.1916, 5505.5574, 5502.9817, 5500.6380, 5500.3180, 5500.3180] +25-08-28 13:52:07 | D | + Adding low-rank branches to transformer_blocks.7.attn.to_out.0 +25-08-28 13:52:07 | D | - Calibrating low-rank branch for transformer_blocks.7.attn.to_add_out +25-08-28 13:52:07 | D | + w: sint4 +25-08-28 13:52:07 | D | + x: sint4 +25-08-28 13:52:07 | D | + y: None +25-08-28 13:52:07 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:52:07 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:52:07 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:52:08 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:52:17 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 13:52:17 | D | - error = [ 1348.1829, 1329.2924, 1319.5035, 1315.8967, 1307.2192, 1303.1728, 1301.1094, 1301.0197, 1302.1831] +25-08-28 13:52:17 | D | - best error = [ 1348.1829, 1329.2924, 1319.5035, 1315.8967, 1307.2192, 1303.1728, 1301.1094, 1301.0197, 1301.0197] +25-08-28 13:52:17 | D | + Adding low-rank branches to transformer_blocks.7.attn.to_add_out +25-08-28 13:52:17 | D | - Calibrating low-rank branch for transformer_blocks.7.ff.net.0.proj +25-08-28 13:52:17 | D | + w: sint4 +25-08-28 13:52:17 | D | + x: sint4 +25-08-28 13:52:17 | D | + y: None +25-08-28 13:52:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:52:17 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:52:17 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:52:19 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:52:40 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:52:40 | D | - error = [ 6949.5732, 6869.7846, 6814.0120, 6773.8261, 6748.6627, 6726.0214, 6711.7917, 6701.8439, 6688.8268, 6679.7105] +25-08-28 13:52:40 | D | - best error = [ 6949.5732, 6869.7846, 6814.0120, 6773.8261, 6748.6627, 6726.0214, 6711.7917, 6701.8439, 6688.8268, 6679.7105] +25-08-28 13:53:00 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 13:53:00 | D | - error = [ 6671.2655, 6666.6591, 6661.1795, 6655.7407, 6650.5332, 6647.6721, 6642.5543, 6638.7960, 6635.6088, 6630.7858] +25-08-28 13:53:00 | D | - best error = [ 6671.2655, 6666.6591, 6661.1795, 6655.7407, 6650.5332, 6647.6721, 6642.5543, 6638.7960, 6635.6088, 6630.7858] +25-08-28 13:53:22 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 13:53:22 | D | - error = [ 6629.9315, 6628.9118, 6626.7971, 6624.2156, 6621.7300, 6621.4249, 6619.4964, 6618.9727, 6617.7924, 6616.9821] +25-08-28 13:53:22 | D | - best error = [ 6629.9315, 6628.9118, 6626.7971, 6624.2156, 6621.7300, 6621.4249, 6619.4964, 6618.9727, 6617.7924, 6616.9821] +25-08-28 13:53:32 | D | - iter = [ 30, 31, 32, 33] +25-08-28 13:53:32 | D | - error = [ 6616.2515, 6614.6825, 6612.6470, 6613.1802] +25-08-28 13:53:32 | D | - best error = [ 6616.2515, 6614.6825, 6612.6470, 6612.6470] +25-08-28 13:53:32 | D | + Adding low-rank branches to transformer_blocks.7.ff.net.0.proj +25-08-28 13:53:32 | D | - Calibrating low-rank branch for transformer_blocks.7.ff.net.2.linear +25-08-28 13:53:32 | D | + w: sint4 +25-08-28 13:53:32 | D | + x: sint4 +25-08-28 13:53:32 | D | + y: None +25-08-28 13:53:32 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:53:32 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:53:32 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:53:37 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:53:55 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 13:53:55 | D | - error = [14290.8972, 14265.3062, 14239.3489, 14230.8527, 14205.7217, 14207.0788] +25-08-28 13:53:55 | D | - best error = [14290.8972, 14265.3062, 14239.3489, 14230.8527, 14205.7217, 14205.7217] +25-08-28 13:53:55 | D | + Adding low-rank branches to transformer_blocks.7.ff.net.2.linear +25-08-28 13:53:55 | D | - Calibrating low-rank branch for transformer_blocks.7.ff_context.net.0.proj +25-08-28 13:53:55 | D | + w: sint4 +25-08-28 13:53:55 | D | + x: sint4 +25-08-28 13:53:55 | D | + y: None +25-08-28 13:53:55 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:53:55 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:53:55 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:53:56 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:54:07 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:54:07 | D | - error = [ 1285.2890, 1251.5261, 1226.5712, 1213.3409, 1201.3053, 1192.5099, 1182.4299, 1177.9103, 1171.5508, 1168.2920] +25-08-28 13:54:07 | D | - best error = [ 1285.2890, 1251.5261, 1226.5712, 1213.3409, 1201.3053, 1192.5099, 1182.4299, 1177.9103, 1171.5508, 1168.2920] +25-08-28 13:54:19 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 13:54:19 | D | - error = [ 1163.8840, 1161.3417, 1159.0001, 1158.3414, 1155.3346, 1154.3801, 1153.6377, 1151.9661, 1150.6006, 1150.0972] +25-08-28 13:54:19 | D | - best error = [ 1163.8840, 1161.3417, 1159.0001, 1158.3414, 1155.3346, 1154.3801, 1153.6377, 1151.9661, 1150.6006, 1150.0972] +25-08-28 13:54:21 | D | - iter = [ 20, 21] +25-08-28 13:54:21 | D | - error = [ 1149.5582, 1150.1168] +25-08-28 13:54:21 | D | - best error = [ 1149.5582, 1149.5582] +25-08-28 13:54:21 | D | + Adding low-rank branches to transformer_blocks.7.ff_context.net.0.proj +25-08-28 13:54:22 | D | - Calibrating low-rank branch for transformer_blocks.7.ff_context.net.2.linear +25-08-28 13:54:22 | D | + w: sint4 +25-08-28 13:54:22 | D | + x: sint4 +25-08-28 13:54:22 | D | + y: None +25-08-28 13:54:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:54:22 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:54:22 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:54:22 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:54:31 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 13:54:31 | D | - error = [ 616.0388, 607.5933, 603.4434, 596.9358, 592.2102, 590.9484, 592.1820] +25-08-28 13:54:31 | D | - best error = [ 616.0388, 607.5933, 603.4434, 596.9358, 592.2102, 590.9484, 590.9484] +25-08-28 13:54:32 | D | + Adding low-rank branches to transformer_blocks.7.ff_context.net.2.linear +25-08-28 13:54:50 | D | - Calibrating low-rank branches of block transformer_blocks.8 +25-08-28 13:54:50 | D | - Calibrating low-rank branch for transformer_blocks.8.attn.to_q, transformer_blocks.8.attn.to_k, transformer_blocks.8.attn.to_v +25-08-28 13:54:50 | D | + w: sint4 +25-08-28 13:54:50 | D | + x: sint4 +25-08-28 13:54:50 | D | + y: None +25-08-28 13:54:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:54:50 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:54:50 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:54:51 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:55:08 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 13:55:08 | D | - error = [ 3397.3707, 3151.8345, 3011.9094, 2967.5818, 2919.7152, 2915.3048, 2886.7515, 2876.3680, 2878.6789] +25-08-28 13:55:08 | D | - best error = [ 3397.3707, 3151.8345, 3011.9094, 2967.5818, 2919.7152, 2915.3048, 2886.7515, 2876.3680, 2876.3680] +25-08-28 13:55:08 | D | + Adding low-rank branches to transformer_blocks.8.attn.to_q, transformer_blocks.8.attn.to_k, transformer_blocks.8.attn.to_v +25-08-28 13:55:08 | D | - Calibrating low-rank branch for transformer_blocks.8.attn.add_q_proj, transformer_blocks.8.attn.add_k_proj, transformer_blocks.8.attn.add_v_proj +25-08-28 13:55:08 | D | + w: sint4 +25-08-28 13:55:08 | D | + x: sint4 +25-08-28 13:55:08 | D | + y: None +25-08-28 13:55:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:55:08 | D | + finished parsing calibration arguments, ram usage: 16.6 +25-08-28 13:55:08 | D | + finished resetting calibrator, ram usage: 16.6 +25-08-28 13:55:09 | D | + finished calculating the original outputs, ram usage: 16.8 +25-08-28 13:55:21 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 13:55:21 | D | - error = [ 1292.3495, 1265.7589, 1261.9891, 1258.2755, 1251.7895, 1245.1683, 1246.8508] +25-08-28 13:55:21 | D | - best error = [ 1292.3495, 1265.7589, 1261.9891, 1258.2755, 1251.7895, 1245.1683, 1245.1683] +25-08-28 13:55:21 | D | + Adding low-rank branches to transformer_blocks.8.attn.add_q_proj, transformer_blocks.8.attn.add_k_proj, transformer_blocks.8.attn.add_v_proj +25-08-28 13:55:22 | D | - Calibrating low-rank branch for transformer_blocks.8.attn.to_out.0 +25-08-28 13:55:22 | D | + w: sint4 +25-08-28 13:55:22 | D | + x: sint4 +25-08-28 13:55:22 | D | + y: None +25-08-28 13:55:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:55:22 | D | + finished parsing calibration arguments, ram usage: 19.0 +25-08-28 13:55:22 | D | + finished resetting calibrator, ram usage: 19.0 +25-08-28 13:55:23 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 13:55:38 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:55:38 | D | - error = [ 7111.4882, 7031.2827, 6978.3052, 6940.2758, 6915.7880, 6896.3602, 6875.7784, 6861.6214, 6852.3794, 6844.2764] +25-08-28 13:55:38 | D | - best error = [ 7111.4882, 7031.2827, 6978.3052, 6940.2758, 6915.7880, 6896.3602, 6875.7784, 6861.6214, 6852.3794, 6844.2764] +25-08-28 13:55:52 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 13:55:52 | D | - error = [ 6834.0963, 6828.2724, 6820.6856, 6814.3352, 6811.8403, 6806.5563, 6801.3193, 6798.5119, 6793.0300, 6789.9776] +25-08-28 13:55:52 | D | - best error = [ 6834.0963, 6828.2724, 6820.6856, 6814.3352, 6811.8403, 6806.5563, 6801.3193, 6798.5119, 6793.0300, 6789.9776] +25-08-28 13:56:08 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 13:56:08 | D | - error = [ 6783.9741, 6783.7276, 6781.6234, 6780.6041, 6778.7589, 6774.8662, 6773.2843, 6772.4338, 6771.7741, 6769.0153] +25-08-28 13:56:08 | D | - best error = [ 6783.9741, 6783.7276, 6781.6234, 6780.6041, 6778.7589, 6774.8662, 6773.2843, 6772.4338, 6771.7741, 6769.0153] +25-08-28 13:56:14 | D | - iter = [ 30, 31, 32, 33] +25-08-28 13:56:14 | D | - error = [ 6766.8114, 6765.8942, 6765.6281, 6765.8150] +25-08-28 13:56:14 | D | - best error = [ 6766.8114, 6765.8942, 6765.6281, 6765.6281] +25-08-28 13:56:14 | D | + Adding low-rank branches to transformer_blocks.8.attn.to_out.0 +25-08-28 13:56:14 | D | - Calibrating low-rank branch for transformer_blocks.8.attn.to_add_out +25-08-28 13:56:14 | D | + w: sint4 +25-08-28 13:56:14 | D | + x: sint4 +25-08-28 13:56:14 | D | + y: None +25-08-28 13:56:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:56:14 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 13:56:14 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 13:56:15 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 13:56:25 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:56:25 | D | - error = [ 1468.3838, 1456.8019, 1444.7845, 1436.1645, 1429.3952, 1428.2387, 1426.2824, 1421.0906, 1418.4431, 1416.7021] +25-08-28 13:56:25 | D | - best error = [ 1468.3838, 1456.8019, 1444.7845, 1436.1645, 1429.3952, 1428.2387, 1426.2824, 1421.0906, 1418.4431, 1416.7021] +25-08-28 13:56:29 | D | - iter = [ 10, 11, 12, 13] +25-08-28 13:56:29 | D | - error = [ 1412.2406, 1411.6037, 1409.3875, 1411.6258] +25-08-28 13:56:29 | D | - best error = [ 1412.2406, 1411.6037, 1409.3875, 1409.3875] +25-08-28 13:56:29 | D | + Adding low-rank branches to transformer_blocks.8.attn.to_add_out +25-08-28 13:56:29 | D | - Calibrating low-rank branch for transformer_blocks.8.ff.net.0.proj +25-08-28 13:56:29 | D | + w: sint4 +25-08-28 13:56:29 | D | + x: sint4 +25-08-28 13:56:29 | D | + y: None +25-08-28 13:56:29 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:56:29 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 13:56:29 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 13:56:31 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 13:56:51 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:56:51 | D | - error = [ 6705.4816, 6616.7070, 6560.9088, 6516.0451, 6487.8785, 6465.7652, 6448.0071, 6434.9222, 6420.7788, 6411.5689] +25-08-28 13:56:51 | D | - best error = [ 6705.4816, 6616.7070, 6560.9088, 6516.0451, 6487.8785, 6465.7652, 6448.0071, 6434.9222, 6420.7788, 6411.5689] +25-08-28 13:57:12 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 13:57:12 | D | - error = [ 6403.7136, 6396.3502, 6389.0309, 6384.1438, 6379.6724, 6376.1233, 6372.8970, 6370.2797, 6364.2115, 6362.3188] +25-08-28 13:57:12 | D | - best error = [ 6403.7136, 6396.3502, 6389.0309, 6384.1438, 6379.6724, 6376.1233, 6372.8970, 6370.2797, 6364.2115, 6362.3188] +25-08-28 13:57:32 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 13:57:32 | D | - error = [ 6360.6371, 6358.6712, 6356.6822, 6354.7116, 6351.6858, 6348.9943, 6348.0539, 6345.6546, 6345.0595, 6344.2746] +25-08-28 13:57:32 | D | - best error = [ 6360.6371, 6358.6712, 6356.6822, 6354.7116, 6351.6858, 6348.9943, 6348.0539, 6345.6546, 6345.0595, 6344.2746] +25-08-28 13:57:47 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36] +25-08-28 13:57:47 | D | - error = [ 6342.6219, 6342.1662, 6341.6515, 6340.8851, 6340.2899, 6339.9987, 6340.1290] +25-08-28 13:57:47 | D | - best error = [ 6342.6219, 6342.1662, 6341.6515, 6340.8851, 6340.2899, 6339.9987, 6339.9987] +25-08-28 13:57:47 | D | + Adding low-rank branches to transformer_blocks.8.ff.net.0.proj +25-08-28 13:57:47 | D | - Calibrating low-rank branch for transformer_blocks.8.ff.net.2.linear +25-08-28 13:57:47 | D | + w: sint4 +25-08-28 13:57:47 | D | + x: sint4 +25-08-28 13:57:47 | D | + y: None +25-08-28 13:57:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:57:47 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 13:57:47 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 13:57:52 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 13:58:23 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:58:23 | D | - error = [15080.7830, 15035.1279, 15016.5168, 15004.5294, 15000.1243, 14991.5779, 14981.3933, 14964.6463, 14962.4977, 14966.0319] +25-08-28 13:58:23 | D | - best error = [15080.7830, 15035.1279, 15016.5168, 15004.5294, 15000.1243, 14991.5779, 14981.3933, 14964.6463, 14962.4977, 14962.4977] +25-08-28 13:58:24 | D | + Adding low-rank branches to transformer_blocks.8.ff.net.2.linear +25-08-28 13:58:24 | D | - Calibrating low-rank branch for transformer_blocks.8.ff_context.net.0.proj +25-08-28 13:58:24 | D | + w: sint4 +25-08-28 13:58:24 | D | + x: sint4 +25-08-28 13:58:24 | D | + y: None +25-08-28 13:58:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:58:24 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 13:58:24 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 13:58:24 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 13:58:36 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:58:36 | D | - error = [ 1540.5523, 1505.0783, 1485.3466, 1466.5713, 1454.0663, 1447.0143, 1437.4687, 1430.0371, 1424.1407, 1419.6075] +25-08-28 13:58:36 | D | - best error = [ 1540.5523, 1505.0783, 1485.3466, 1466.5713, 1454.0663, 1447.0143, 1437.4687, 1430.0371, 1424.1407, 1419.6075] +25-08-28 13:58:45 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17] +25-08-28 13:58:45 | D | - error = [ 1415.0418, 1414.4521, 1411.0020, 1408.3676, 1406.5799, 1404.0439, 1401.5889, 1401.7617] +25-08-28 13:58:45 | D | - best error = [ 1415.0418, 1414.4521, 1411.0020, 1408.3676, 1406.5799, 1404.0439, 1401.5889, 1401.5889] +25-08-28 13:58:45 | D | + Adding low-rank branches to transformer_blocks.8.ff_context.net.0.proj +25-08-28 13:58:45 | D | - Calibrating low-rank branch for transformer_blocks.8.ff_context.net.2.linear +25-08-28 13:58:45 | D | + w: sint4 +25-08-28 13:58:45 | D | + x: sint4 +25-08-28 13:58:45 | D | + y: None +25-08-28 13:58:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:58:45 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 13:58:45 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 13:58:46 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 13:58:59 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 13:58:59 | D | - error = [ 551.1957, 539.2042, 533.4496, 529.0681, 524.3997, 523.7017, 523.1815, 522.8111, 521.1802, 521.5641] +25-08-28 13:58:59 | D | - best error = [ 551.1957, 539.2042, 533.4496, 529.0681, 524.3997, 523.7017, 523.1815, 522.8111, 521.1802, 521.1802] +25-08-28 13:58:59 | D | + Adding low-rank branches to transformer_blocks.8.ff_context.net.2.linear +25-08-28 13:59:17 | D | - Calibrating low-rank branches of block transformer_blocks.9 +25-08-28 13:59:17 | D | - Calibrating low-rank branch for transformer_blocks.9.attn.to_q, transformer_blocks.9.attn.to_k, transformer_blocks.9.attn.to_v +25-08-28 13:59:17 | D | + w: sint4 +25-08-28 13:59:17 | D | + x: sint4 +25-08-28 13:59:17 | D | + y: None +25-08-28 13:59:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:59:17 | D | + finished parsing calibration arguments, ram usage: 16.0 +25-08-28 13:59:17 | D | + finished resetting calibrator, ram usage: 16.0 +25-08-28 13:59:19 | D | + finished calculating the original outputs, ram usage: 16.2 +25-08-28 13:59:35 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 13:59:35 | D | - error = [ 3551.3799, 3457.0662, 3407.0235, 3379.2251, 3361.2379, 3341.1839, 3334.0615, 3307.3792, 3317.1736] +25-08-28 13:59:35 | D | - best error = [ 3551.3799, 3457.0662, 3407.0235, 3379.2251, 3361.2379, 3341.1839, 3334.0615, 3307.3792, 3307.3792] +25-08-28 13:59:36 | D | + Adding low-rank branches to transformer_blocks.9.attn.to_q, transformer_blocks.9.attn.to_k, transformer_blocks.9.attn.to_v +25-08-28 13:59:36 | D | - Calibrating low-rank branch for transformer_blocks.9.attn.add_q_proj, transformer_blocks.9.attn.add_k_proj, transformer_blocks.9.attn.add_v_proj +25-08-28 13:59:36 | D | + w: sint4 +25-08-28 13:59:36 | D | + x: sint4 +25-08-28 13:59:36 | D | + y: None +25-08-28 13:59:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:59:36 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:59:36 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:59:37 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 13:59:47 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 13:59:47 | D | - error = [ 1144.7548, 1119.4335, 1106.5008, 1095.7941, 1091.2425, 1097.4974] +25-08-28 13:59:47 | D | - best error = [ 1144.7548, 1119.4335, 1106.5008, 1095.7941, 1091.2425, 1091.2425] +25-08-28 13:59:47 | D | + Adding low-rank branches to transformer_blocks.9.attn.add_q_proj, transformer_blocks.9.attn.add_k_proj, transformer_blocks.9.attn.add_v_proj +25-08-28 13:59:47 | D | - Calibrating low-rank branch for transformer_blocks.9.attn.to_out.0 +25-08-28 13:59:47 | D | + w: sint4 +25-08-28 13:59:47 | D | + x: sint4 +25-08-28 13:59:47 | D | + y: None +25-08-28 13:59:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 13:59:47 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 13:59:47 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 13:59:49 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 14:00:03 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:00:03 | D | - error = [ 5506.3487, 5453.8218, 5427.5584, 5400.4679, 5379.2488, 5367.0700, 5358.9648, 5349.5680, 5343.8406, 5336.7979] +25-08-28 14:00:03 | D | - best error = [ 5506.3487, 5453.8218, 5427.5584, 5400.4679, 5379.2488, 5367.0700, 5358.9648, 5349.5680, 5343.8406, 5336.7979] +25-08-28 14:00:18 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:00:18 | D | - error = [ 5328.1755, 5324.8579, 5321.3855, 5316.1975, 5315.4353, 5311.6103, 5310.2979, 5309.9966, 5306.4408, 5304.8885] +25-08-28 14:00:18 | D | - best error = [ 5328.1755, 5324.8579, 5321.3855, 5316.1975, 5315.4353, 5311.6103, 5310.2979, 5309.9966, 5306.4408, 5304.8885] +25-08-28 14:00:26 | D | - iter = [ 20, 21, 22, 23, 24] +25-08-28 14:00:26 | D | - error = [ 5302.1784, 5301.1244, 5301.0297, 5298.0349, 5298.2782] +25-08-28 14:00:26 | D | - best error = [ 5302.1784, 5301.1244, 5301.0297, 5298.0349, 5298.0349] +25-08-28 14:00:26 | D | + Adding low-rank branches to transformer_blocks.9.attn.to_out.0 +25-08-28 14:00:26 | D | - Calibrating low-rank branch for transformer_blocks.9.attn.to_add_out +25-08-28 14:00:26 | D | + w: sint4 +25-08-28 14:00:26 | D | + x: sint4 +25-08-28 14:00:26 | D | + y: None +25-08-28 14:00:26 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:00:26 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 14:00:26 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 14:00:27 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 14:00:37 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:00:37 | D | - error = [ 1395.2028, 1383.6656, 1374.0860, 1365.0339, 1363.1752, 1356.9096, 1352.9409, 1349.3242, 1349.0181, 1347.9168] +25-08-28 14:00:37 | D | - best error = [ 1395.2028, 1383.6656, 1374.0860, 1365.0339, 1363.1752, 1356.9096, 1352.9409, 1349.3242, 1349.0181, 1347.9168] +25-08-28 14:00:40 | D | - iter = [ 10, 11, 12] +25-08-28 14:00:40 | D | - error = [ 1347.5123, 1347.3835, 1349.0330] +25-08-28 14:00:40 | D | - best error = [ 1347.5123, 1347.3835, 1347.3835] +25-08-28 14:00:40 | D | + Adding low-rank branches to transformer_blocks.9.attn.to_add_out +25-08-28 14:00:40 | D | - Calibrating low-rank branch for transformer_blocks.9.ff.net.0.proj +25-08-28 14:00:40 | D | + w: sint4 +25-08-28 14:00:40 | D | + x: sint4 +25-08-28 14:00:40 | D | + y: None +25-08-28 14:00:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:00:40 | D | + finished parsing calibration arguments, ram usage: 18.8 +25-08-28 14:00:40 | D | + finished resetting calibrator, ram usage: 18.8 +25-08-28 14:00:42 | D | + finished calculating the original outputs, ram usage: 18.8 +25-08-28 14:01:03 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:01:03 | D | - error = [ 6954.9608, 6866.0044, 6806.1807, 6765.2973, 6736.4041, 6710.3326, 6692.4405, 6679.0640, 6665.9265, 6655.9020] +25-08-28 14:01:03 | D | - best error = [ 6954.9608, 6866.0044, 6806.1807, 6765.2973, 6736.4041, 6710.3326, 6692.4405, 6679.0640, 6665.9265, 6655.9020] +25-08-28 14:01:23 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:01:23 | D | - error = [ 6647.4813, 6640.0009, 6632.4502, 6626.0712, 6622.9851, 6616.1206, 6611.4915, 6607.7203, 6604.3894, 6601.2523] +25-08-28 14:01:23 | D | - best error = [ 6647.4813, 6640.0009, 6632.4502, 6626.0712, 6622.9851, 6616.1206, 6611.4915, 6607.7203, 6604.3894, 6601.2523] +25-08-28 14:01:44 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 14:01:44 | D | - error = [ 6597.9560, 6596.6558, 6594.6178, 6593.2832, 6589.7080, 6587.5722, 6586.6759, 6583.6485, 6583.3605, 6581.1768] +25-08-28 14:01:44 | D | - best error = [ 6597.9560, 6596.6558, 6594.6178, 6593.2832, 6589.7080, 6587.5722, 6586.6759, 6583.6485, 6583.3605, 6581.1768] +25-08-28 14:02:05 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-28 14:02:05 | D | - error = [ 6579.6641, 6579.2212, 6578.9160, 6578.0529, 6576.9564, 6575.8078, 6575.0027, 6574.3502, 6574.2616, 6572.1683] +25-08-28 14:02:05 | D | - best error = [ 6579.6641, 6579.2212, 6578.9160, 6578.0529, 6576.9564, 6575.8078, 6575.0027, 6574.3502, 6574.2616, 6572.1683] +25-08-28 14:02:23 | D | - iter = [ 40, 41, 42, 43, 44, 45, 46, 47, 48] +25-08-28 14:02:23 | D | - error = [ 6571.3542, 6570.4904, 6569.7288, 6569.4462, 6568.8599, 6568.6967, 6567.8855, 6567.1699, 6568.1242] +25-08-28 14:02:23 | D | - best error = [ 6571.3542, 6570.4904, 6569.7288, 6569.4462, 6568.8599, 6568.6967, 6567.8855, 6567.1699, 6567.1699] +25-08-28 14:02:24 | D | + Adding low-rank branches to transformer_blocks.9.ff.net.0.proj +25-08-28 14:02:24 | D | - Calibrating low-rank branch for transformer_blocks.9.ff.net.2.linear +25-08-28 14:02:24 | D | + w: sint4 +25-08-28 14:02:24 | D | + x: sint4 +25-08-28 14:02:24 | D | + y: None +25-08-28 14:02:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:02:24 | D | + finished parsing calibration arguments, ram usage: 17.3 +25-08-28 14:02:24 | D | + finished resetting calibrator, ram usage: 17.3 +25-08-28 14:02:28 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:02:47 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 14:02:47 | D | - error = [14140.6003, 14099.0303, 14070.3444, 14048.2232, 14046.3780, 14047.1589] +25-08-28 14:02:47 | D | - best error = [14140.6003, 14099.0303, 14070.3444, 14048.2232, 14046.3780, 14046.3780] +25-08-28 14:02:47 | D | + Adding low-rank branches to transformer_blocks.9.ff.net.2.linear +25-08-28 14:02:47 | D | - Calibrating low-rank branch for transformer_blocks.9.ff_context.net.0.proj +25-08-28 14:02:47 | D | + w: sint4 +25-08-28 14:02:47 | D | + x: sint4 +25-08-28 14:02:47 | D | + y: None +25-08-28 14:02:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:02:47 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:02:47 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:02:48 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:02:59 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:02:59 | D | - error = [ 1510.0875, 1473.9347, 1451.5676, 1432.4308, 1419.3246, 1406.1491, 1399.4268, 1394.0790, 1388.0222, 1380.6915] +25-08-28 14:02:59 | D | - best error = [ 1510.0875, 1473.9347, 1451.5676, 1432.4308, 1419.3246, 1406.1491, 1399.4268, 1394.0790, 1388.0222, 1380.6915] +25-08-28 14:03:10 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:03:10 | D | - error = [ 1377.2064, 1374.2664, 1371.4963, 1368.9784, 1366.9485, 1366.8756, 1364.3358, 1363.1065, 1361.7480, 1360.7491] +25-08-28 14:03:10 | D | - best error = [ 1377.2064, 1374.2664, 1371.4963, 1368.9784, 1366.9485, 1366.8756, 1364.3358, 1363.1065, 1361.7480, 1360.7491] +25-08-28 14:03:14 | D | - iter = [ 20, 21, 22] +25-08-28 14:03:14 | D | - error = [ 1359.3940, 1357.3268, 1357.7933] +25-08-28 14:03:14 | D | - best error = [ 1359.3940, 1357.3268, 1357.3268] +25-08-28 14:03:14 | D | + Adding low-rank branches to transformer_blocks.9.ff_context.net.0.proj +25-08-28 14:03:14 | D | - Calibrating low-rank branch for transformer_blocks.9.ff_context.net.2.linear +25-08-28 14:03:14 | D | + w: sint4 +25-08-28 14:03:14 | D | + x: sint4 +25-08-28 14:03:14 | D | + y: None +25-08-28 14:03:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:03:14 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:03:14 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:03:15 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:03:27 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:03:27 | D | - error = [ 2710.2857, 2680.5898, 2650.0177, 2634.4210, 2618.7685, 2612.4766, 2600.2343, 2598.5591, 2596.4595, 2591.9948] +25-08-28 14:03:27 | D | - best error = [ 2710.2857, 2680.5898, 2650.0177, 2634.4210, 2618.7685, 2612.4766, 2600.2343, 2598.5591, 2596.4595, 2591.9948] +25-08-28 14:03:36 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16] +25-08-28 14:03:36 | D | - error = [ 2589.4586, 2587.8672, 2584.3622, 2579.5348, 2579.4616, 2576.5396, 2576.6007] +25-08-28 14:03:36 | D | - best error = [ 2589.4586, 2587.8672, 2584.3622, 2579.5348, 2579.4616, 2576.5396, 2576.5396] +25-08-28 14:03:36 | D | + Adding low-rank branches to transformer_blocks.9.ff_context.net.2.linear +25-08-28 14:03:55 | D | - Calibrating low-rank branches of block transformer_blocks.10 +25-08-28 14:03:55 | D | - Calibrating low-rank branch for transformer_blocks.10.attn.to_q, transformer_blocks.10.attn.to_k, transformer_blocks.10.attn.to_v +25-08-28 14:03:55 | D | + w: sint4 +25-08-28 14:03:55 | D | + x: sint4 +25-08-28 14:03:55 | D | + y: None +25-08-28 14:03:55 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:03:55 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:03:55 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:03:56 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:04:15 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:04:15 | D | - error = [ 4037.1395, 3932.2628, 3859.3064, 3849.3581, 3822.3507, 3811.6461, 3796.9548, 3792.6453, 3785.5601, 3780.8771] +25-08-28 14:04:15 | D | - best error = [ 4037.1395, 3932.2628, 3859.3064, 3849.3581, 3822.3507, 3811.6461, 3796.9548, 3792.6453, 3785.5601, 3780.8771] +25-08-28 14:04:17 | D | - iter = [ 10] +25-08-28 14:04:17 | D | - error = [ 3783.7701] +25-08-28 14:04:17 | D | - best error = [ 3780.8771] +25-08-28 14:04:17 | D | + Adding low-rank branches to transformer_blocks.10.attn.to_q, transformer_blocks.10.attn.to_k, transformer_blocks.10.attn.to_v +25-08-28 14:04:17 | D | - Calibrating low-rank branch for transformer_blocks.10.attn.add_q_proj, transformer_blocks.10.attn.add_k_proj, transformer_blocks.10.attn.add_v_proj +25-08-28 14:04:17 | D | + w: sint4 +25-08-28 14:04:17 | D | + x: sint4 +25-08-28 14:04:17 | D | + y: None +25-08-28 14:04:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:04:17 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:04:17 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:04:18 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:04:27 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-28 14:04:27 | D | - error = [ 1723.9300, 1705.8000, 1689.6114, 1682.4831, 1690.3193] +25-08-28 14:04:27 | D | - best error = [ 1723.9300, 1705.8000, 1689.6114, 1682.4831, 1682.4831] +25-08-28 14:04:27 | D | + Adding low-rank branches to transformer_blocks.10.attn.add_q_proj, transformer_blocks.10.attn.add_k_proj, transformer_blocks.10.attn.add_v_proj +25-08-28 14:04:27 | D | - Calibrating low-rank branch for transformer_blocks.10.attn.to_out.0 +25-08-28 14:04:27 | D | + w: sint4 +25-08-28 14:04:27 | D | + x: sint4 +25-08-28 14:04:27 | D | + y: None +25-08-28 14:04:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:04:27 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:04:27 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:04:29 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:04:44 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:04:44 | D | - error = [ 6653.1382, 6585.8889, 6551.0124, 6520.3994, 6493.0449, 6472.4635, 6456.6607, 6442.9332, 6430.7953, 6423.3845] +25-08-28 14:04:44 | D | - best error = [ 6653.1382, 6585.8889, 6551.0124, 6520.3994, 6493.0449, 6472.4635, 6456.6607, 6442.9332, 6430.7953, 6423.3845] +25-08-28 14:04:56 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17] +25-08-28 14:04:56 | D | - error = [ 6416.4651, 6411.5075, 6406.5554, 6399.7037, 6395.7896, 6391.6361, 6388.2401, 6389.8053] +25-08-28 14:04:56 | D | - best error = [ 6416.4651, 6411.5075, 6406.5554, 6399.7037, 6395.7896, 6391.6361, 6388.2401, 6388.2401] +25-08-28 14:04:56 | D | + Adding low-rank branches to transformer_blocks.10.attn.to_out.0 +25-08-28 14:04:56 | D | - Calibrating low-rank branch for transformer_blocks.10.attn.to_add_out +25-08-28 14:04:56 | D | + w: sint4 +25-08-28 14:04:56 | D | + x: sint4 +25-08-28 14:04:56 | D | + y: None +25-08-28 14:04:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:04:56 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:04:56 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:04:57 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:05:07 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:05:07 | D | - error = [ 1662.8350, 1647.3452, 1627.2683, 1618.8444, 1612.6233, 1607.7224, 1605.4749, 1601.6879, 1599.0736, 1597.1762] +25-08-28 14:05:07 | D | - best error = [ 1662.8350, 1647.3452, 1627.2683, 1618.8444, 1612.6233, 1607.7224, 1605.4749, 1601.6879, 1599.0736, 1597.1762] +25-08-28 14:05:13 | D | - iter = [ 10, 11, 12, 13, 14, 15] +25-08-28 14:05:13 | D | - error = [ 1594.3506, 1590.9701, 1588.9065, 1583.4844, 1582.2621, 1583.6935] +25-08-28 14:05:13 | D | - best error = [ 1594.3506, 1590.9701, 1588.9065, 1583.4844, 1582.2621, 1582.2621] +25-08-28 14:05:13 | D | + Adding low-rank branches to transformer_blocks.10.attn.to_add_out +25-08-28 14:05:13 | D | - Calibrating low-rank branch for transformer_blocks.10.ff.net.0.proj +25-08-28 14:05:13 | D | + w: sint4 +25-08-28 14:05:13 | D | + x: sint4 +25-08-28 14:05:13 | D | + y: None +25-08-28 14:05:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:05:13 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:05:13 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:05:15 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:05:36 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:05:36 | D | - error = [ 7132.4088, 7035.6805, 6975.4644, 6930.0196, 6896.7865, 6871.9834, 6852.5117, 6835.8420, 6821.1606, 6811.7581] +25-08-28 14:05:36 | D | - best error = [ 7132.4088, 7035.6805, 6975.4644, 6930.0196, 6896.7865, 6871.9834, 6852.5117, 6835.8420, 6821.1606, 6811.7581] +25-08-28 14:05:56 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:05:56 | D | - error = [ 6799.9165, 6789.9694, 6784.5253, 6778.0124, 6773.9779, 6765.1356, 6760.6056, 6756.6696, 6752.0649, 6749.4901] +25-08-28 14:05:56 | D | - best error = [ 6799.9165, 6789.9694, 6784.5253, 6778.0124, 6773.9779, 6765.1356, 6760.6056, 6756.6696, 6752.0649, 6749.4901] +25-08-28 14:06:17 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 14:06:17 | D | - error = [ 6743.0153, 6740.9240, 6738.7091, 6737.6963, 6735.6862, 6732.9117, 6731.4117, 6730.9411, 6729.5025, 6729.9560] +25-08-28 14:06:17 | D | - best error = [ 6743.0153, 6740.9240, 6738.7091, 6737.6963, 6735.6862, 6732.9117, 6731.4117, 6730.9411, 6729.5025, 6729.5025] +25-08-28 14:06:17 | D | + Adding low-rank branches to transformer_blocks.10.ff.net.0.proj +25-08-28 14:06:17 | D | - Calibrating low-rank branch for transformer_blocks.10.ff.net.2.linear +25-08-28 14:06:17 | D | + w: sint4 +25-08-28 14:06:17 | D | + x: sint4 +25-08-28 14:06:17 | D | + y: None +25-08-28 14:06:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:06:17 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:06:17 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:06:22 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:06:37 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-28 14:06:37 | D | - error = [12636.9756, 12635.0965, 12613.9497, 12595.0759, 12596.1937] +25-08-28 14:06:37 | D | - best error = [12636.9756, 12635.0965, 12613.9497, 12595.0759, 12595.0759] +25-08-28 14:06:38 | D | + Adding low-rank branches to transformer_blocks.10.ff.net.2.linear +25-08-28 14:06:38 | D | - Calibrating low-rank branch for transformer_blocks.10.ff_context.net.0.proj +25-08-28 14:06:38 | D | + w: sint4 +25-08-28 14:06:38 | D | + x: sint4 +25-08-28 14:06:38 | D | + y: None +25-08-28 14:06:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:06:38 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:06:38 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:06:38 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:06:50 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:06:50 | D | - error = [ 3292.7854, 3270.5605, 3265.9626, 3255.5052, 3250.8620, 3249.8326, 3247.1641, 3243.1209, 3240.6922, 3235.3807] +25-08-28 14:06:50 | D | - best error = [ 3292.7854, 3270.5605, 3265.9626, 3255.5052, 3250.8620, 3249.8326, 3247.1641, 3243.1209, 3240.6922, 3235.3807] +25-08-28 14:06:52 | D | - iter = [ 10, 11] +25-08-28 14:06:52 | D | - error = [ 3234.0853, 3235.5701] +25-08-28 14:06:52 | D | - best error = [ 3234.0853, 3234.0853] +25-08-28 14:06:52 | D | + Adding low-rank branches to transformer_blocks.10.ff_context.net.0.proj +25-08-28 14:06:52 | D | - Calibrating low-rank branch for transformer_blocks.10.ff_context.net.2.linear +25-08-28 14:06:52 | D | + w: sint4 +25-08-28 14:06:52 | D | + x: sint4 +25-08-28 14:06:52 | D | + y: None +25-08-28 14:06:52 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:06:52 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:06:52 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:06:53 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:06:59 | D | - iter = [ 0, 1, 2, 3] +25-08-28 14:06:59 | D | - error = [ 4041.2857, 4023.7142, 4002.0760, 4005.8457] +25-08-28 14:06:59 | D | - best error = [ 4041.2857, 4023.7142, 4002.0760, 4002.0760] +25-08-28 14:06:59 | D | + Adding low-rank branches to transformer_blocks.10.ff_context.net.2.linear +25-08-28 14:07:17 | D | - Calibrating low-rank branches of block transformer_blocks.11 +25-08-28 14:07:17 | D | - Calibrating low-rank branch for transformer_blocks.11.attn.to_q, transformer_blocks.11.attn.to_k, transformer_blocks.11.attn.to_v +25-08-28 14:07:17 | D | + w: sint4 +25-08-28 14:07:17 | D | + x: sint4 +25-08-28 14:07:17 | D | + y: None +25-08-28 14:07:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:07:17 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:07:17 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:07:18 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:07:35 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 14:07:35 | D | - error = [ 4242.8601, 4042.6826, 3978.5688, 3929.4669, 3910.1674, 3880.0191, 3863.7272, 3859.7497, 3865.6446] +25-08-28 14:07:35 | D | - best error = [ 4242.8601, 4042.6826, 3978.5688, 3929.4669, 3910.1674, 3880.0191, 3863.7272, 3859.7497, 3859.7497] +25-08-28 14:07:35 | D | + Adding low-rank branches to transformer_blocks.11.attn.to_q, transformer_blocks.11.attn.to_k, transformer_blocks.11.attn.to_v +25-08-28 14:07:35 | D | - Calibrating low-rank branch for transformer_blocks.11.attn.add_q_proj, transformer_blocks.11.attn.add_k_proj, transformer_blocks.11.attn.add_v_proj +25-08-28 14:07:35 | D | + w: sint4 +25-08-28 14:07:35 | D | + x: sint4 +25-08-28 14:07:35 | D | + y: None +25-08-28 14:07:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:07:35 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:07:35 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:07:36 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:07:53 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:07:53 | D | - error = [ 1918.1843, 1892.8203, 1886.3493, 1882.5021, 1865.4065, 1864.7340, 1863.6609, 1853.9662, 1852.3317, 1848.2776] +25-08-28 14:07:53 | D | - best error = [ 1918.1843, 1892.8203, 1886.3493, 1882.5021, 1865.4065, 1864.7340, 1863.6609, 1853.9662, 1852.3317, 1848.2776] +25-08-28 14:07:55 | D | - iter = [ 10] +25-08-28 14:07:55 | D | - error = [ 1854.9125] +25-08-28 14:07:55 | D | - best error = [ 1848.2776] +25-08-28 14:07:55 | D | + Adding low-rank branches to transformer_blocks.11.attn.add_q_proj, transformer_blocks.11.attn.add_k_proj, transformer_blocks.11.attn.add_v_proj +25-08-28 14:07:55 | D | - Calibrating low-rank branch for transformer_blocks.11.attn.to_out.0 +25-08-28 14:07:55 | D | + w: sint4 +25-08-28 14:07:55 | D | + x: sint4 +25-08-28 14:07:55 | D | + y: None +25-08-28 14:07:55 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:07:55 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:07:55 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:07:57 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:08:11 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:08:11 | D | - error = [ 7592.4441, 7527.2545, 7478.4218, 7438.0153, 7410.1449, 7388.2117, 7373.5451, 7354.2997, 7348.9646, 7338.2483] +25-08-28 14:08:11 | D | - best error = [ 7592.4441, 7527.2545, 7478.4218, 7438.0153, 7410.1449, 7388.2117, 7373.5451, 7354.2997, 7348.9646, 7338.2483] +25-08-28 14:08:26 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:08:26 | D | - error = [ 7331.9034, 7328.9983, 7322.3832, 7316.0306, 7311.4086, 7308.2228, 7304.7615, 7299.4578, 7293.8454, 7288.8869] +25-08-28 14:08:26 | D | - best error = [ 7331.9034, 7328.9983, 7322.3832, 7316.0306, 7311.4086, 7308.2228, 7304.7615, 7299.4578, 7293.8454, 7288.8869] +25-08-28 14:08:30 | D | - iter = [ 20, 21, 22] +25-08-28 14:08:30 | D | - error = [ 7288.0137, 7285.2404, 7285.9945] +25-08-28 14:08:30 | D | - best error = [ 7288.0137, 7285.2404, 7285.2404] +25-08-28 14:08:31 | D | + Adding low-rank branches to transformer_blocks.11.attn.to_out.0 +25-08-28 14:08:31 | D | - Calibrating low-rank branch for transformer_blocks.11.attn.to_add_out +25-08-28 14:08:31 | D | + w: sint4 +25-08-28 14:08:31 | D | + x: sint4 +25-08-28 14:08:31 | D | + y: None +25-08-28 14:08:31 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:08:31 | D | + finished parsing calibration arguments, ram usage: 16.6 +25-08-28 14:08:31 | D | + finished resetting calibrator, ram usage: 16.6 +25-08-28 14:08:31 | D | + finished calculating the original outputs, ram usage: 16.7 +25-08-28 14:08:41 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:08:41 | D | - error = [ 1815.5620, 1793.7931, 1781.9786, 1773.6319, 1764.4265, 1759.1936, 1758.3379, 1756.9339, 1752.2368, 1749.1701] +25-08-28 14:08:41 | D | - best error = [ 1815.5620, 1793.7931, 1781.9786, 1773.6319, 1764.4265, 1759.1936, 1758.3379, 1756.9339, 1752.2368, 1749.1701] +25-08-28 14:08:43 | D | - iter = [ 10, 11] +25-08-28 14:08:43 | D | - error = [ 1745.0876, 1748.3203] +25-08-28 14:08:43 | D | - best error = [ 1745.0876, 1745.0876] +25-08-28 14:08:44 | D | + Adding low-rank branches to transformer_blocks.11.attn.to_add_out +25-08-28 14:08:44 | D | - Calibrating low-rank branch for transformer_blocks.11.ff.net.0.proj +25-08-28 14:08:44 | D | + w: sint4 +25-08-28 14:08:44 | D | + x: sint4 +25-08-28 14:08:44 | D | + y: None +25-08-28 14:08:44 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:08:44 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:08:44 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:08:46 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:09:06 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:09:06 | D | - error = [ 6939.9516, 6843.9924, 6777.9212, 6731.3318, 6696.9121, 6670.9322, 6650.8454, 6635.8041, 6624.3949, 6613.8285] +25-08-28 14:09:06 | D | - best error = [ 6939.9516, 6843.9924, 6777.9212, 6731.3318, 6696.9121, 6670.9322, 6650.8454, 6635.8041, 6624.3949, 6613.8285] +25-08-28 14:09:27 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:09:27 | D | - error = [ 6604.7083, 6595.0397, 6589.4206, 6584.1968, 6578.1858, 6575.2563, 6573.3097, 6568.4878, 6564.3979, 6559.8884] +25-08-28 14:09:27 | D | - best error = [ 6604.7083, 6595.0397, 6589.4206, 6584.1968, 6578.1858, 6575.2563, 6573.3097, 6568.4878, 6564.3979, 6559.8884] +25-08-28 14:09:47 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 14:09:47 | D | - error = [ 6559.6799, 6556.3240, 6553.7366, 6553.2706, 6550.9092, 6548.0513, 6546.0034, 6542.9739, 6542.0920, 6540.5651] +25-08-28 14:09:47 | D | - best error = [ 6559.6799, 6556.3240, 6553.7366, 6553.2706, 6550.9092, 6548.0513, 6546.0034, 6542.9739, 6542.0920, 6540.5651] +25-08-28 14:10:08 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-28 14:10:08 | D | - error = [ 6538.7543, 6537.2484, 6537.1220, 6534.3612, 6533.6544, 6533.3495, 6530.9684, 6529.2398, 6527.3431, 6527.0621] +25-08-28 14:10:08 | D | - best error = [ 6538.7543, 6537.2484, 6537.1220, 6534.3612, 6533.6544, 6533.3495, 6530.9684, 6529.2398, 6527.3431, 6527.0621] +25-08-28 14:10:10 | D | - iter = [ 40] +25-08-28 14:10:10 | D | - error = [ 6528.8976] +25-08-28 14:10:10 | D | - best error = [ 6527.0621] +25-08-28 14:10:10 | D | + Adding low-rank branches to transformer_blocks.11.ff.net.0.proj +25-08-28 14:10:10 | D | - Calibrating low-rank branch for transformer_blocks.11.ff.net.2.linear +25-08-28 14:10:10 | D | + w: sint4 +25-08-28 14:10:10 | D | + x: sint4 +25-08-28 14:10:10 | D | + y: None +25-08-28 14:10:10 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:10:10 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:10:10 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:10:15 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:10:45 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:10:45 | D | - error = [15943.3352, 15906.1553, 15876.5991, 15860.6043, 15851.9546, 15839.9273, 15819.9499, 15806.9944, 15801.0921, 15806.2069] +25-08-28 14:10:45 | D | - best error = [15943.3352, 15906.1553, 15876.5991, 15860.6043, 15851.9546, 15839.9273, 15819.9499, 15806.9944, 15801.0921, 15801.0921] +25-08-28 14:10:46 | D | + Adding low-rank branches to transformer_blocks.11.ff.net.2.linear +25-08-28 14:10:46 | D | - Calibrating low-rank branch for transformer_blocks.11.ff_context.net.0.proj +25-08-28 14:10:46 | D | + w: sint4 +25-08-28 14:10:46 | D | + x: sint4 +25-08-28 14:10:46 | D | + y: None +25-08-28 14:10:46 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:10:46 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:10:46 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:10:46 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:10:58 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:10:58 | D | - error = [ 1786.7833, 1740.3360, 1708.1740, 1685.4644, 1669.8439, 1656.1076, 1644.9115, 1633.7331, 1625.2811, 1618.9991] +25-08-28 14:10:58 | D | - best error = [ 1786.7833, 1740.3360, 1708.1740, 1685.4644, 1669.8439, 1656.1076, 1644.9115, 1633.7331, 1625.2811, 1618.9991] +25-08-28 14:11:09 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:11:09 | D | - error = [ 1613.9568, 1609.7788, 1606.6231, 1602.7281, 1599.0839, 1597.3740, 1595.0586, 1593.2195, 1590.5705, 1587.3950] +25-08-28 14:11:09 | D | - best error = [ 1613.9568, 1609.7788, 1606.6231, 1602.7281, 1599.0839, 1597.3740, 1595.0586, 1593.2195, 1590.5705, 1587.3950] +25-08-28 14:11:16 | D | - iter = [ 20, 21, 22, 23, 24, 25] +25-08-28 14:11:16 | D | - error = [ 1585.6576, 1585.2214, 1584.7574, 1580.3217, 1579.5541, 1580.2030] +25-08-28 14:11:16 | D | - best error = [ 1585.6576, 1585.2214, 1584.7574, 1580.3217, 1579.5541, 1579.5541] +25-08-28 14:11:16 | D | + Adding low-rank branches to transformer_blocks.11.ff_context.net.0.proj +25-08-28 14:11:17 | D | - Calibrating low-rank branch for transformer_blocks.11.ff_context.net.2.linear +25-08-28 14:11:17 | D | + w: sint4 +25-08-28 14:11:17 | D | + x: sint4 +25-08-28 14:11:17 | D | + y: None +25-08-28 14:11:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:11:17 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:11:17 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:11:18 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:11:29 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 14:11:29 | D | - error = [ 705.6304, 700.9211, 696.5746, 691.6867, 691.3189, 689.9388, 687.5775, 684.1077, 684.1321] +25-08-28 14:11:29 | D | - best error = [ 705.6304, 700.9211, 696.5746, 691.6867, 691.3189, 689.9388, 687.5775, 684.1077, 684.1077] +25-08-28 14:11:29 | D | + Adding low-rank branches to transformer_blocks.11.ff_context.net.2.linear +25-08-28 14:11:47 | D | - Calibrating low-rank branches of block transformer_blocks.12 +25-08-28 14:11:47 | D | - Calibrating low-rank branch for transformer_blocks.12.attn.to_q, transformer_blocks.12.attn.to_k, transformer_blocks.12.attn.to_v +25-08-28 14:11:47 | D | + w: sint4 +25-08-28 14:11:47 | D | + x: sint4 +25-08-28 14:11:47 | D | + y: None +25-08-28 14:11:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:11:47 | D | + finished parsing calibration arguments, ram usage: 17.7 +25-08-28 14:11:47 | D | + finished resetting calibrator, ram usage: 17.7 +25-08-28 14:11:49 | D | + finished calculating the original outputs, ram usage: 18.0 +25-08-28 14:12:06 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 14:12:06 | D | - error = [ 4943.4448, 4842.4539, 4740.5198, 4699.2661, 4686.4174, 4685.3085, 4662.8744, 4662.4650, 4662.5961] +25-08-28 14:12:06 | D | - best error = [ 4943.4448, 4842.4539, 4740.5198, 4699.2661, 4686.4174, 4685.3085, 4662.8744, 4662.4650, 4662.4650] +25-08-28 14:12:06 | D | + Adding low-rank branches to transformer_blocks.12.attn.to_q, transformer_blocks.12.attn.to_k, transformer_blocks.12.attn.to_v +25-08-28 14:12:06 | D | - Calibrating low-rank branch for transformer_blocks.12.attn.add_q_proj, transformer_blocks.12.attn.add_k_proj, transformer_blocks.12.attn.add_v_proj +25-08-28 14:12:06 | D | + w: sint4 +25-08-28 14:12:06 | D | + x: sint4 +25-08-28 14:12:06 | D | + y: None +25-08-28 14:12:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:12:06 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:12:06 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:12:07 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:12:23 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 14:12:23 | D | - error = [ 1655.6545, 1618.6076, 1613.4300, 1599.0353, 1595.3091, 1593.1590, 1587.1332, 1582.6499, 1585.8486] +25-08-28 14:12:23 | D | - best error = [ 1655.6545, 1618.6076, 1613.4300, 1599.0353, 1595.3091, 1593.1590, 1587.1332, 1582.6499, 1582.6499] +25-08-28 14:12:23 | D | + Adding low-rank branches to transformer_blocks.12.attn.add_q_proj, transformer_blocks.12.attn.add_k_proj, transformer_blocks.12.attn.add_v_proj +25-08-28 14:12:23 | D | - Calibrating low-rank branch for transformer_blocks.12.attn.to_out.0 +25-08-28 14:12:23 | D | + w: sint4 +25-08-28 14:12:23 | D | + x: sint4 +25-08-28 14:12:23 | D | + y: None +25-08-28 14:12:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:12:23 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:12:23 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:12:25 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:12:40 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:12:40 | D | - error = [ 8280.0681, 8199.1804, 8148.0150, 8102.7129, 8077.6130, 8057.8382, 8035.8337, 8020.4471, 8008.9495, 7998.1513] +25-08-28 14:12:40 | D | - best error = [ 8280.0681, 8199.1804, 8148.0150, 8102.7129, 8077.6130, 8057.8382, 8035.8337, 8020.4471, 8008.9495, 7998.1513] +25-08-28 14:12:55 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:12:55 | D | - error = [ 7992.8085, 7986.0561, 7979.0087, 7968.2902, 7965.4213, 7961.5418, 7957.7067, 7956.7893, 7955.8328, 7952.9693] +25-08-28 14:12:55 | D | - best error = [ 7992.8085, 7986.0561, 7979.0087, 7968.2902, 7965.4213, 7961.5418, 7957.7067, 7956.7893, 7955.8328, 7952.9693] +25-08-28 14:12:58 | D | - iter = [ 20, 21] +25-08-28 14:12:58 | D | - error = [ 7945.2731, 7946.1369] +25-08-28 14:12:58 | D | - best error = [ 7945.2731, 7945.2731] +25-08-28 14:12:58 | D | + Adding low-rank branches to transformer_blocks.12.attn.to_out.0 +25-08-28 14:12:58 | D | - Calibrating low-rank branch for transformer_blocks.12.attn.to_add_out +25-08-28 14:12:58 | D | + w: sint4 +25-08-28 14:12:58 | D | + x: sint4 +25-08-28 14:12:58 | D | + y: None +25-08-28 14:12:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:12:58 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:12:58 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:12:59 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:13:07 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 14:13:07 | D | - error = [ 1953.6425, 1938.6981, 1920.9969, 1909.4490, 1906.7146, 1906.6314, 1895.1408, 1897.1730] +25-08-28 14:13:07 | D | - best error = [ 1953.6425, 1938.6981, 1920.9969, 1909.4490, 1906.7146, 1906.6314, 1895.1408, 1895.1408] +25-08-28 14:13:07 | D | + Adding low-rank branches to transformer_blocks.12.attn.to_add_out +25-08-28 14:13:07 | D | - Calibrating low-rank branch for transformer_blocks.12.ff.net.0.proj +25-08-28 14:13:07 | D | + w: sint4 +25-08-28 14:13:07 | D | + x: sint4 +25-08-28 14:13:07 | D | + y: None +25-08-28 14:13:07 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:13:07 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:13:07 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:13:09 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:13:30 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:13:30 | D | - error = [ 7026.2109, 6940.1444, 6878.9563, 6834.4296, 6795.3802, 6769.2475, 6752.0800, 6736.1072, 6726.5974, 6714.8409] +25-08-28 14:13:30 | D | - best error = [ 7026.2109, 6940.1444, 6878.9563, 6834.4296, 6795.3802, 6769.2475, 6752.0800, 6736.1072, 6726.5974, 6714.8409] +25-08-28 14:13:50 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:13:50 | D | - error = [ 6703.1053, 6695.3960, 6689.6738, 6685.0675, 6679.6910, 6676.1195, 6672.3489, 6671.4136, 6666.4242, 6662.9585] +25-08-28 14:13:50 | D | - best error = [ 6703.1053, 6695.3960, 6689.6738, 6685.0675, 6679.6910, 6676.1195, 6672.3489, 6671.4136, 6666.4242, 6662.9585] +25-08-28 14:14:11 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 14:14:11 | D | - error = [ 6660.4612, 6657.8318, 6657.2992, 6653.9528, 6652.1200, 6650.0547, 6647.1082, 6645.6991, 6645.0360, 6644.2408] +25-08-28 14:14:11 | D | - best error = [ 6660.4612, 6657.8318, 6657.2992, 6653.9528, 6652.1200, 6650.0547, 6647.1082, 6645.6991, 6645.0360, 6644.2408] +25-08-28 14:14:25 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36] +25-08-28 14:14:25 | D | - error = [ 6642.8271, 6641.4857, 6639.8904, 6638.7454, 6637.1858, 6635.7360, 6635.9476] +25-08-28 14:14:25 | D | - best error = [ 6642.8271, 6641.4857, 6639.8904, 6638.7454, 6637.1858, 6635.7360, 6635.7360] +25-08-28 14:14:25 | D | + Adding low-rank branches to transformer_blocks.12.ff.net.0.proj +25-08-28 14:14:26 | D | - Calibrating low-rank branch for transformer_blocks.12.ff.net.2.linear +25-08-28 14:14:26 | D | + w: sint4 +25-08-28 14:14:26 | D | + x: sint4 +25-08-28 14:14:26 | D | + y: None +25-08-28 14:14:26 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:14:26 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:14:26 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:14:30 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:14:41 | D | - iter = [ 0, 1, 2, 3] +25-08-28 14:14:41 | D | - error = [14466.9494, 14419.3054, 14393.0767, 14394.2722] +25-08-28 14:14:41 | D | - best error = [14466.9494, 14419.3054, 14393.0767, 14393.0767] +25-08-28 14:14:41 | D | + Adding low-rank branches to transformer_blocks.12.ff.net.2.linear +25-08-28 14:14:42 | D | - Calibrating low-rank branch for transformer_blocks.12.ff_context.net.0.proj +25-08-28 14:14:42 | D | + w: sint4 +25-08-28 14:14:42 | D | + x: sint4 +25-08-28 14:14:42 | D | + y: None +25-08-28 14:14:42 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:14:42 | D | + finished parsing calibration arguments, ram usage: 16.4 +25-08-28 14:14:42 | D | + finished resetting calibrator, ram usage: 16.4 +25-08-28 14:14:42 | D | + finished calculating the original outputs, ram usage: 16.5 +25-08-28 14:14:54 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:14:54 | D | - error = [ 2534.3653, 2523.2408, 2519.9593, 2512.5364, 2510.3904, 2505.9610, 2504.9060, 2502.1958, 2500.2241, 2499.0699] +25-08-28 14:14:54 | D | - best error = [ 2534.3653, 2523.2408, 2519.9593, 2512.5364, 2510.3904, 2505.9610, 2504.9060, 2502.1958, 2500.2241, 2499.0699] +25-08-28 14:14:58 | D | - iter = [ 10, 11, 12, 13] +25-08-28 14:14:58 | D | - error = [ 2498.3840, 2495.9616, 2493.6872, 2495.8686] +25-08-28 14:14:58 | D | - best error = [ 2498.3840, 2495.9616, 2493.6872, 2493.6872] +25-08-28 14:14:58 | D | + Adding low-rank branches to transformer_blocks.12.ff_context.net.0.proj +25-08-28 14:14:59 | D | - Calibrating low-rank branch for transformer_blocks.12.ff_context.net.2.linear +25-08-28 14:14:59 | D | + w: sint4 +25-08-28 14:14:59 | D | + x: sint4 +25-08-28 14:14:59 | D | + y: None +25-08-28 14:14:59 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:14:59 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:14:59 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:15:00 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:15:12 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:15:12 | D | - error = [ 2530.6983, 2408.6235, 2312.5094, 2254.3553, 2194.0868, 2152.3827, 2112.6075, 2104.9634, 2075.9966, 2059.6242] +25-08-28 14:15:12 | D | - best error = [ 2530.6983, 2408.6235, 2312.5094, 2254.3553, 2194.0868, 2152.3827, 2112.6075, 2104.9634, 2075.9966, 2059.6242] +25-08-28 14:15:15 | D | - iter = [ 10, 11] +25-08-28 14:15:15 | D | - error = [ 2039.2892, 2041.4138] +25-08-28 14:15:15 | D | - best error = [ 2039.2892, 2039.2892] +25-08-28 14:15:15 | D | + Adding low-rank branches to transformer_blocks.12.ff_context.net.2.linear +25-08-28 14:15:33 | D | - Calibrating low-rank branches of block transformer_blocks.13 +25-08-28 14:15:33 | D | - Calibrating low-rank branch for transformer_blocks.13.attn.to_q, transformer_blocks.13.attn.to_k, transformer_blocks.13.attn.to_v +25-08-28 14:15:33 | D | + w: sint4 +25-08-28 14:15:33 | D | + x: sint4 +25-08-28 14:15:33 | D | + y: None +25-08-28 14:15:33 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:15:33 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:15:33 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:15:34 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:15:53 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:15:53 | D | - error = [ 4944.5932, 4830.2445, 4764.7220, 4742.2130, 4703.7946, 4691.6215, 4665.1718, 4650.6843, 4647.6913, 4649.3604] +25-08-28 14:15:53 | D | - best error = [ 4944.5932, 4830.2445, 4764.7220, 4742.2130, 4703.7946, 4691.6215, 4665.1718, 4650.6843, 4647.6913, 4647.6913] +25-08-28 14:15:53 | D | + Adding low-rank branches to transformer_blocks.13.attn.to_q, transformer_blocks.13.attn.to_k, transformer_blocks.13.attn.to_v +25-08-28 14:15:53 | D | - Calibrating low-rank branch for transformer_blocks.13.attn.add_q_proj, transformer_blocks.13.attn.add_k_proj, transformer_blocks.13.attn.add_v_proj +25-08-28 14:15:53 | D | + w: sint4 +25-08-28 14:15:53 | D | + x: sint4 +25-08-28 14:15:53 | D | + y: None +25-08-28 14:15:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:15:53 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:15:53 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:15:54 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:16:04 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 14:16:04 | D | - error = [ 1103.9887, 1093.7729, 1084.9165, 1074.6822, 1071.5696, 1073.4959] +25-08-28 14:16:04 | D | - best error = [ 1103.9887, 1093.7729, 1084.9165, 1074.6822, 1071.5696, 1071.5696] +25-08-28 14:16:05 | D | + Adding low-rank branches to transformer_blocks.13.attn.add_q_proj, transformer_blocks.13.attn.add_k_proj, transformer_blocks.13.attn.add_v_proj +25-08-28 14:16:05 | D | - Calibrating low-rank branch for transformer_blocks.13.attn.to_out.0 +25-08-28 14:16:05 | D | + w: sint4 +25-08-28 14:16:05 | D | + x: sint4 +25-08-28 14:16:05 | D | + y: None +25-08-28 14:16:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:16:05 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:16:05 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:16:06 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:16:20 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:16:20 | D | - error = [ 6525.2062, 6463.1599, 6414.2008, 6389.8147, 6365.5982, 6353.8681, 6342.4103, 6330.4123, 6319.1300, 6307.4328] +25-08-28 14:16:20 | D | - best error = [ 6525.2062, 6463.1599, 6414.2008, 6389.8147, 6365.5982, 6353.8681, 6342.4103, 6330.4123, 6319.1300, 6307.4328] +25-08-28 14:16:35 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:16:35 | D | - error = [ 6302.7407, 6296.6756, 6294.3670, 6287.6501, 6281.2368, 6277.1309, 6274.2980, 6272.4821, 6271.2962, 6269.9254] +25-08-28 14:16:35 | D | - best error = [ 6302.7407, 6296.6756, 6294.3670, 6287.6501, 6281.2368, 6277.1309, 6274.2980, 6272.4821, 6271.2962, 6269.9254] +25-08-28 14:16:48 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28] +25-08-28 14:16:48 | D | - error = [ 6267.5319, 6265.2291, 6263.3604, 6262.9932, 6261.4679, 6257.3362, 6252.8701, 6252.2994, 6254.1282] +25-08-28 14:16:48 | D | - best error = [ 6267.5319, 6265.2291, 6263.3604, 6262.9932, 6261.4679, 6257.3362, 6252.8701, 6252.2994, 6252.2994] +25-08-28 14:16:49 | D | + Adding low-rank branches to transformer_blocks.13.attn.to_out.0 +25-08-28 14:16:49 | D | - Calibrating low-rank branch for transformer_blocks.13.attn.to_add_out +25-08-28 14:16:49 | D | + w: sint4 +25-08-28 14:16:49 | D | + x: sint4 +25-08-28 14:16:49 | D | + y: None +25-08-28 14:16:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:16:49 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:16:49 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:16:49 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:16:59 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:16:59 | D | - error = [ 1330.6637, 1310.4585, 1302.1744, 1295.8986, 1291.8622, 1287.2646, 1284.0147, 1281.7695, 1278.9330, 1279.2522] +25-08-28 14:16:59 | D | - best error = [ 1330.6637, 1310.4585, 1302.1744, 1295.8986, 1291.8622, 1287.2646, 1284.0147, 1281.7695, 1278.9330, 1278.9330] +25-08-28 14:17:00 | D | + Adding low-rank branches to transformer_blocks.13.attn.to_add_out +25-08-28 14:17:00 | D | - Calibrating low-rank branch for transformer_blocks.13.ff.net.0.proj +25-08-28 14:17:00 | D | + w: sint4 +25-08-28 14:17:00 | D | + x: sint4 +25-08-28 14:17:00 | D | + y: None +25-08-28 14:17:00 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:17:00 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:17:00 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:17:02 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:17:22 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:17:22 | D | - error = [ 6927.2650, 6840.8222, 6780.0189, 6735.1689, 6705.0575, 6680.9550, 6662.9951, 6649.2261, 6635.6992, 6625.2934] +25-08-28 14:17:22 | D | - best error = [ 6927.2650, 6840.8222, 6780.0189, 6735.1689, 6705.0575, 6680.9550, 6662.9951, 6649.2261, 6635.6992, 6625.2934] +25-08-28 14:17:43 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:17:43 | D | - error = [ 6616.4649, 6609.6390, 6603.8755, 6597.4074, 6592.5801, 6589.2819, 6583.5996, 6577.7796, 6575.4300, 6573.1788] +25-08-28 14:17:43 | D | - best error = [ 6616.4649, 6609.6390, 6603.8755, 6597.4074, 6592.5801, 6589.2819, 6583.5996, 6577.7796, 6575.4300, 6573.1788] +25-08-28 14:18:03 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 14:18:03 | D | - error = [ 6569.0879, 6566.4742, 6565.4703, 6563.8083, 6562.1314, 6560.7341, 6559.1801, 6558.3406, 6557.7130, 6555.6678] +25-08-28 14:18:03 | D | - best error = [ 6569.0879, 6566.4742, 6565.4703, 6563.8083, 6562.1314, 6560.7341, 6559.1801, 6558.3406, 6557.7130, 6555.6678] +25-08-28 14:18:24 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-28 14:18:24 | D | - error = [ 6553.9237, 6553.3134, 6552.4432, 6551.9148, 6551.4957, 6550.8222, 6549.1801, 6548.8527, 6547.4587, 6546.5240] +25-08-28 14:18:24 | D | - best error = [ 6553.9237, 6553.3134, 6552.4432, 6551.9148, 6551.4957, 6550.8222, 6549.1801, 6548.8527, 6547.4587, 6546.5240] +25-08-28 14:18:45 | D | - iter = [ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49] +25-08-28 14:18:45 | D | - error = [ 6545.1885, 6543.7906, 6541.5680, 6541.3017, 6540.6287, 6540.1012, 6538.8109, 6537.9003, 6536.5927, 6536.5367] +25-08-28 14:18:45 | D | - best error = [ 6545.1885, 6543.7906, 6541.5680, 6541.3017, 6540.6287, 6540.1012, 6538.8109, 6537.9003, 6536.5927, 6536.5367] +25-08-28 14:18:53 | D | - iter = [ 50, 51, 52, 53] +25-08-28 14:18:53 | D | - error = [ 6535.7999, 6535.7797, 6535.4437, 6535.5428] +25-08-28 14:18:53 | D | - best error = [ 6535.7999, 6535.7797, 6535.4437, 6535.4437] +25-08-28 14:18:53 | D | + Adding low-rank branches to transformer_blocks.13.ff.net.0.proj +25-08-28 14:18:53 | D | - Calibrating low-rank branch for transformer_blocks.13.ff.net.2.linear +25-08-28 14:18:53 | D | + w: sint4 +25-08-28 14:18:53 | D | + x: sint4 +25-08-28 14:18:53 | D | + y: None +25-08-28 14:18:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:18:53 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:18:53 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:18:58 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:19:29 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:19:29 | D | - error = [15670.8494, 15635.5922, 15619.5385, 15605.8625, 15589.4877, 15574.7699, 15561.7824, 15545.8239, 15538.0035, 15533.0023] +25-08-28 14:19:29 | D | - best error = [15670.8494, 15635.5922, 15619.5385, 15605.8625, 15589.4877, 15574.7699, 15561.7824, 15545.8239, 15538.0035, 15533.0023] +25-08-28 14:19:32 | D | - iter = [ 10] +25-08-28 14:19:32 | D | - error = [15536.6699] +25-08-28 14:19:32 | D | - best error = [15533.0023] +25-08-28 14:19:32 | D | + Adding low-rank branches to transformer_blocks.13.ff.net.2.linear +25-08-28 14:19:32 | D | - Calibrating low-rank branch for transformer_blocks.13.ff_context.net.0.proj +25-08-28 14:19:32 | D | + w: sint4 +25-08-28 14:19:32 | D | + x: sint4 +25-08-28 14:19:32 | D | + y: None +25-08-28 14:19:32 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:19:32 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:19:32 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:19:33 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:19:37 | D | - iter = [ 0, 1, 2, 3] +25-08-28 14:19:37 | D | - error = [ 1655.5698, 1647.0121, 1642.8386, 1644.9074] +25-08-28 14:19:37 | D | - best error = [ 1655.5698, 1647.0121, 1642.8386, 1642.8386] +25-08-28 14:19:38 | D | + Adding low-rank branches to transformer_blocks.13.ff_context.net.0.proj +25-08-28 14:19:38 | D | - Calibrating low-rank branch for transformer_blocks.13.ff_context.net.2.linear +25-08-28 14:19:38 | D | + w: sint4 +25-08-28 14:19:38 | D | + x: sint4 +25-08-28 14:19:38 | D | + y: None +25-08-28 14:19:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:19:38 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:19:38 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:19:39 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:19:53 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:19:53 | D | - error = [ 564.5484, 555.3122, 548.9545, 544.1944, 538.3231, 536.7175, 535.6212, 533.5749, 531.9338, 532.1820] +25-08-28 14:19:53 | D | - best error = [ 564.5484, 555.3122, 548.9545, 544.1944, 538.3231, 536.7175, 535.6212, 533.5749, 531.9338, 531.9338] +25-08-28 14:19:53 | D | + Adding low-rank branches to transformer_blocks.13.ff_context.net.2.linear +25-08-28 14:20:11 | D | - Calibrating low-rank branches of block transformer_blocks.14 +25-08-28 14:20:11 | D | - Calibrating low-rank branch for transformer_blocks.14.attn.to_q, transformer_blocks.14.attn.to_k, transformer_blocks.14.attn.to_v +25-08-28 14:20:11 | D | + w: sint4 +25-08-28 14:20:11 | D | + x: sint4 +25-08-28 14:20:11 | D | + y: None +25-08-28 14:20:11 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:20:11 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:20:11 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:20:12 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:20:31 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:20:31 | D | - error = [ 4386.3180, 4259.3683, 4181.7477, 4128.1846, 4105.6695, 4076.0432, 4072.1152, 4060.2702, 4049.8917, 4046.3340] +25-08-28 14:20:31 | D | - best error = [ 4386.3180, 4259.3683, 4181.7477, 4128.1846, 4105.6695, 4076.0432, 4072.1152, 4060.2702, 4049.8917, 4046.3340] +25-08-28 14:20:35 | D | - iter = [ 10, 11] +25-08-28 14:20:35 | D | - error = [ 4017.6843, 4031.0698] +25-08-28 14:20:35 | D | - best error = [ 4017.6843, 4017.6843] +25-08-28 14:20:35 | D | + Adding low-rank branches to transformer_blocks.14.attn.to_q, transformer_blocks.14.attn.to_k, transformer_blocks.14.attn.to_v +25-08-28 14:20:35 | D | - Calibrating low-rank branch for transformer_blocks.14.attn.add_q_proj, transformer_blocks.14.attn.add_k_proj, transformer_blocks.14.attn.add_v_proj +25-08-28 14:20:35 | D | + w: sint4 +25-08-28 14:20:35 | D | + x: sint4 +25-08-28 14:20:35 | D | + y: None +25-08-28 14:20:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:20:35 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:20:35 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:20:36 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:20:54 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:20:54 | D | - error = [ 1544.7029, 1518.8560, 1505.4790, 1502.2133, 1484.6177, 1480.6925, 1472.8408, 1472.4071, 1468.5821, 1469.3184] +25-08-28 14:20:54 | D | - best error = [ 1544.7029, 1518.8560, 1505.4790, 1502.2133, 1484.6177, 1480.6925, 1472.8408, 1472.4071, 1468.5821, 1468.5821] +25-08-28 14:20:54 | D | + Adding low-rank branches to transformer_blocks.14.attn.add_q_proj, transformer_blocks.14.attn.add_k_proj, transformer_blocks.14.attn.add_v_proj +25-08-28 14:20:54 | D | - Calibrating low-rank branch for transformer_blocks.14.attn.to_out.0 +25-08-28 14:20:54 | D | + w: sint4 +25-08-28 14:20:54 | D | + x: sint4 +25-08-28 14:20:54 | D | + y: None +25-08-28 14:20:54 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:20:54 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:20:54 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:20:56 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:21:11 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:21:11 | D | - error = [ 9031.7538, 8946.4728, 8874.6700, 8840.3149, 8812.1929, 8788.8070, 8768.8809, 8748.7709, 8727.8098, 8716.7058] +25-08-28 14:21:11 | D | - best error = [ 9031.7538, 8946.4728, 8874.6700, 8840.3149, 8812.1929, 8788.8070, 8768.8809, 8748.7709, 8727.8098, 8716.7058] +25-08-28 14:21:26 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:21:26 | D | - error = [ 8713.9731, 8702.2636, 8696.4402, 8688.6768, 8683.0244, 8678.1699, 8670.8805, 8667.1772, 8664.3186, 8661.1076] +25-08-28 14:21:26 | D | - best error = [ 8713.9731, 8702.2636, 8696.4402, 8688.6768, 8683.0244, 8678.1699, 8670.8805, 8667.1772, 8664.3186, 8661.1076] +25-08-28 14:21:28 | D | - iter = [ 20] +25-08-28 14:21:28 | D | - error = [ 8664.0240] +25-08-28 14:21:28 | D | - best error = [ 8661.1076] +25-08-28 14:21:28 | D | + Adding low-rank branches to transformer_blocks.14.attn.to_out.0 +25-08-28 14:21:28 | D | - Calibrating low-rank branch for transformer_blocks.14.attn.to_add_out +25-08-28 14:21:28 | D | + w: sint4 +25-08-28 14:21:28 | D | + x: sint4 +25-08-28 14:21:28 | D | + y: None +25-08-28 14:21:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:21:28 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:21:28 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:21:29 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:21:39 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:21:39 | D | - error = [ 1735.2829, 1716.9580, 1704.9647, 1699.8451, 1694.1296, 1685.5032, 1680.9912, 1678.2341, 1675.7118, 1671.2642] +25-08-28 14:21:39 | D | - best error = [ 1735.2829, 1716.9580, 1704.9647, 1699.8451, 1694.1296, 1685.5032, 1680.9912, 1678.2341, 1675.7118, 1671.2642] +25-08-28 14:21:43 | D | - iter = [ 10, 11, 12, 13] +25-08-28 14:21:43 | D | - error = [ 1667.4511, 1664.6315, 1663.2150, 1663.3698] +25-08-28 14:21:43 | D | - best error = [ 1667.4511, 1664.6315, 1663.2150, 1663.2150] +25-08-28 14:21:43 | D | + Adding low-rank branches to transformer_blocks.14.attn.to_add_out +25-08-28 14:21:44 | D | - Calibrating low-rank branch for transformer_blocks.14.ff.net.0.proj +25-08-28 14:21:44 | D | + w: sint4 +25-08-28 14:21:44 | D | + x: sint4 +25-08-28 14:21:44 | D | + y: None +25-08-28 14:21:44 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:21:44 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:21:44 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:21:45 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:22:06 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:22:06 | D | - error = [ 6729.5206, 6643.9594, 6588.6897, 6551.7619, 6519.4868, 6496.0887, 6477.5705, 6463.6751, 6453.2429, 6441.7085] +25-08-28 14:22:06 | D | - best error = [ 6729.5206, 6643.9594, 6588.6897, 6551.7619, 6519.4868, 6496.0887, 6477.5705, 6463.6751, 6453.2429, 6441.7085] +25-08-28 14:22:27 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:22:27 | D | - error = [ 6433.1594, 6425.2738, 6419.5146, 6414.3277, 6409.1800, 6406.1439, 6402.1331, 6400.0544, 6396.6301, 6394.1974] +25-08-28 14:22:27 | D | - best error = [ 6433.1594, 6425.2738, 6419.5146, 6414.3277, 6409.1800, 6406.1439, 6402.1331, 6400.0544, 6396.6301, 6394.1974] +25-08-28 14:22:48 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 14:22:48 | D | - error = [ 6392.4462, 6390.0861, 6387.2914, 6383.4279, 6383.1986, 6381.5760, 6381.0425, 6379.3768, 6376.7901, 6376.0213] +25-08-28 14:22:48 | D | - best error = [ 6392.4462, 6390.0861, 6387.2914, 6383.4279, 6383.1986, 6381.5760, 6381.0425, 6379.3768, 6376.7901, 6376.0213] +25-08-28 14:23:08 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-28 14:23:08 | D | - error = [ 6374.0612, 6373.7308, 6371.8362, 6370.7067, 6368.2443, 6367.8021, 6365.8143, 6365.4470, 6364.6637, 6364.3843] +25-08-28 14:23:08 | D | - best error = [ 6374.0612, 6373.7308, 6371.8362, 6370.7067, 6368.2443, 6367.8021, 6365.8143, 6365.4470, 6364.6637, 6364.3843] +25-08-28 14:23:14 | D | - iter = [ 40, 41, 42] +25-08-28 14:23:14 | D | - error = [ 6364.1716, 6362.2273, 6362.2433] +25-08-28 14:23:14 | D | - best error = [ 6364.1716, 6362.2273, 6362.2273] +25-08-28 14:23:15 | D | + Adding low-rank branches to transformer_blocks.14.ff.net.0.proj +25-08-28 14:23:15 | D | - Calibrating low-rank branch for transformer_blocks.14.ff.net.2.linear +25-08-28 14:23:15 | D | + w: sint4 +25-08-28 14:23:15 | D | + x: sint4 +25-08-28 14:23:15 | D | + y: None +25-08-28 14:23:15 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:23:15 | D | + finished parsing calibration arguments, ram usage: 18.0 +25-08-28 14:23:15 | D | + finished resetting calibrator, ram usage: 18.0 +25-08-28 14:23:19 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:23:40 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 14:23:40 | D | - error = [14134.6145, 14096.7383, 14070.7083, 14052.1509, 14027.6738, 14014.1460, 14018.9597] +25-08-28 14:23:40 | D | - best error = [14134.6145, 14096.7383, 14070.7083, 14052.1509, 14027.6738, 14014.1460, 14014.1460] +25-08-28 14:23:41 | D | + Adding low-rank branches to transformer_blocks.14.ff.net.2.linear +25-08-28 14:23:41 | D | - Calibrating low-rank branch for transformer_blocks.14.ff_context.net.0.proj +25-08-28 14:23:41 | D | + w: sint4 +25-08-28 14:23:41 | D | + x: sint4 +25-08-28 14:23:41 | D | + y: None +25-08-28 14:23:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:23:41 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:23:41 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:23:41 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:23:51 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 14:23:51 | D | - error = [ 2680.6701, 2663.0406, 2658.7497, 2649.1068, 2647.1307, 2637.9295, 2631.6228, 2632.1159] +25-08-28 14:23:51 | D | - best error = [ 2680.6701, 2663.0406, 2658.7497, 2649.1068, 2647.1307, 2637.9295, 2631.6228, 2631.6228] +25-08-28 14:23:51 | D | + Adding low-rank branches to transformer_blocks.14.ff_context.net.0.proj +25-08-28 14:23:51 | D | - Calibrating low-rank branch for transformer_blocks.14.ff_context.net.2.linear +25-08-28 14:23:51 | D | + w: sint4 +25-08-28 14:23:51 | D | + x: sint4 +25-08-28 14:23:51 | D | + y: None +25-08-28 14:23:51 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:23:51 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:23:51 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:23:52 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:24:02 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 14:24:02 | D | - error = [ 3570.6795, 3436.2532, 3361.5379, 3295.5403, 3241.2933, 3232.2925, 3192.8779, 3212.3071] +25-08-28 14:24:02 | D | - best error = [ 3570.6795, 3436.2532, 3361.5379, 3295.5403, 3241.2933, 3232.2925, 3192.8779, 3192.8779] +25-08-28 14:24:02 | D | + Adding low-rank branches to transformer_blocks.14.ff_context.net.2.linear +25-08-28 14:24:21 | D | - Calibrating low-rank branches of block transformer_blocks.15 +25-08-28 14:24:21 | D | - Calibrating low-rank branch for transformer_blocks.15.attn.to_q, transformer_blocks.15.attn.to_k, transformer_blocks.15.attn.to_v +25-08-28 14:24:21 | D | + w: sint4 +25-08-28 14:24:21 | D | + x: sint4 +25-08-28 14:24:21 | D | + y: None +25-08-28 14:24:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:24:21 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:24:21 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:24:22 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:24:39 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 14:24:39 | D | - error = [ 4410.1651, 4247.9306, 4110.0467, 4044.1412, 3998.8465, 3965.0912, 3929.5870, 3908.9078, 3911.5485] +25-08-28 14:24:39 | D | - best error = [ 4410.1651, 4247.9306, 4110.0467, 4044.1412, 3998.8465, 3965.0912, 3929.5870, 3908.9078, 3908.9078] +25-08-28 14:24:39 | D | + Adding low-rank branches to transformer_blocks.15.attn.to_q, transformer_blocks.15.attn.to_k, transformer_blocks.15.attn.to_v +25-08-28 14:24:39 | D | - Calibrating low-rank branch for transformer_blocks.15.attn.add_q_proj, transformer_blocks.15.attn.add_k_proj, transformer_blocks.15.attn.add_v_proj +25-08-28 14:24:39 | D | + w: sint4 +25-08-28 14:24:39 | D | + x: sint4 +25-08-28 14:24:39 | D | + y: None +25-08-28 14:24:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:24:39 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:24:39 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:24:40 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:24:47 | D | - iter = [ 0, 1, 2, 3] +25-08-28 14:24:47 | D | - error = [ 1539.7297, 1512.3620, 1511.5793, 1513.3367] +25-08-28 14:24:47 | D | - best error = [ 1539.7297, 1512.3620, 1511.5793, 1511.5793] +25-08-28 14:24:47 | D | + Adding low-rank branches to transformer_blocks.15.attn.add_q_proj, transformer_blocks.15.attn.add_k_proj, transformer_blocks.15.attn.add_v_proj +25-08-28 14:24:47 | D | - Calibrating low-rank branch for transformer_blocks.15.attn.to_out.0 +25-08-28 14:24:47 | D | + w: sint4 +25-08-28 14:24:47 | D | + x: sint4 +25-08-28 14:24:47 | D | + y: None +25-08-28 14:24:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:24:47 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:24:47 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:24:49 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:25:03 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:25:03 | D | - error = [ 9858.7995, 9777.4262, 9714.6302, 9667.4485, 9628.5687, 9603.7443, 9583.4441, 9566.4556, 9551.3459, 9542.6110] +25-08-28 14:25:03 | D | - best error = [ 9858.7995, 9777.4262, 9714.6302, 9667.4485, 9628.5687, 9603.7443, 9583.4441, 9566.4556, 9551.3459, 9542.6110] +25-08-28 14:25:18 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:25:18 | D | - error = [ 9529.2728, 9522.5450, 9511.7098, 9503.8110, 9498.9726, 9496.0282, 9492.8567, 9488.8684, 9482.9730, 9479.9321] +25-08-28 14:25:18 | D | - best error = [ 9529.2728, 9522.5450, 9511.7098, 9503.8110, 9498.9726, 9496.0282, 9492.8567, 9488.8684, 9482.9730, 9479.9321] +25-08-28 14:25:33 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 14:25:33 | D | - error = [ 9477.9314, 9473.0612, 9471.6317, 9467.3777, 9465.2742, 9460.3467, 9458.1165, 9454.2854, 9452.4460, 9449.2857] +25-08-28 14:25:33 | D | - best error = [ 9477.9314, 9473.0612, 9471.6317, 9467.3777, 9465.2742, 9460.3467, 9458.1165, 9454.2854, 9452.4460, 9449.2857] +25-08-28 14:25:39 | D | - iter = [ 30, 31, 32, 33] +25-08-28 14:25:39 | D | - error = [ 9446.4948, 9444.0237, 9440.9919, 9442.6585] +25-08-28 14:25:39 | D | - best error = [ 9446.4948, 9444.0237, 9440.9919, 9440.9919] +25-08-28 14:25:39 | D | + Adding low-rank branches to transformer_blocks.15.attn.to_out.0 +25-08-28 14:25:39 | D | - Calibrating low-rank branch for transformer_blocks.15.attn.to_add_out +25-08-28 14:25:39 | D | + w: sint4 +25-08-28 14:25:39 | D | + x: sint4 +25-08-28 14:25:39 | D | + y: None +25-08-28 14:25:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:25:39 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:25:39 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:25:40 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:25:48 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 14:25:48 | D | - error = [ 1557.3554, 1538.3149, 1516.1110, 1502.4469, 1492.7729, 1492.7050, 1485.1752, 1485.2524] +25-08-28 14:25:48 | D | - best error = [ 1557.3554, 1538.3149, 1516.1110, 1502.4469, 1492.7729, 1492.7050, 1485.1752, 1485.1752] +25-08-28 14:25:48 | D | + Adding low-rank branches to transformer_blocks.15.attn.to_add_out +25-08-28 14:25:48 | D | - Calibrating low-rank branch for transformer_blocks.15.ff.net.0.proj +25-08-28 14:25:48 | D | + w: sint4 +25-08-28 14:25:48 | D | + x: sint4 +25-08-28 14:25:48 | D | + y: None +25-08-28 14:25:48 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:25:48 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:25:48 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:25:50 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:26:11 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:26:11 | D | - error = [ 6784.8192, 6702.4422, 6642.8669, 6597.8924, 6562.5358, 6537.8682, 6517.1880, 6502.3682, 6487.9402, 6477.5307] +25-08-28 14:26:11 | D | - best error = [ 6784.8192, 6702.4422, 6642.8669, 6597.8924, 6562.5358, 6537.8682, 6517.1880, 6502.3682, 6487.9402, 6477.5307] +25-08-28 14:26:31 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:26:31 | D | - error = [ 6468.7204, 6463.1630, 6455.6413, 6447.7816, 6444.2852, 6438.6051, 6433.8259, 6430.0454, 6427.2576, 6424.0221] +25-08-28 14:26:31 | D | - best error = [ 6468.7204, 6463.1630, 6455.6413, 6447.7816, 6444.2852, 6438.6051, 6433.8259, 6430.0454, 6427.2576, 6424.0221] +25-08-28 14:26:52 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 14:26:52 | D | - error = [ 6422.4372, 6419.4283, 6418.4101, 6416.6727, 6414.7539, 6413.0586, 6411.6634, 6409.4118, 6409.1207, 6407.1601] +25-08-28 14:26:52 | D | - best error = [ 6422.4372, 6419.4283, 6418.4101, 6416.6727, 6414.7539, 6413.0586, 6411.6634, 6409.4118, 6409.1207, 6407.1601] +25-08-28 14:27:13 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-28 14:27:13 | D | - error = [ 6404.9337, 6403.3166, 6403.1670, 6401.9966, 6400.2594, 6399.8147, 6398.2757, 6397.0987, 6396.9124, 6396.8286] +25-08-28 14:27:13 | D | - best error = [ 6404.9337, 6403.3166, 6403.1670, 6401.9966, 6400.2594, 6399.8147, 6398.2757, 6397.0987, 6396.9124, 6396.8286] +25-08-28 14:27:19 | D | - iter = [ 40, 41, 42] +25-08-28 14:27:19 | D | - error = [ 6396.5422, 6395.2026, 6395.8705] +25-08-28 14:27:19 | D | - best error = [ 6396.5422, 6395.2026, 6395.2026] +25-08-28 14:27:19 | D | + Adding low-rank branches to transformer_blocks.15.ff.net.0.proj +25-08-28 14:27:20 | D | - Calibrating low-rank branch for transformer_blocks.15.ff.net.2.linear +25-08-28 14:27:20 | D | + w: sint4 +25-08-28 14:27:20 | D | + x: sint4 +25-08-28 14:27:20 | D | + y: None +25-08-28 14:27:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:27:20 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:27:20 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:27:24 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:27:45 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 14:27:45 | D | - error = [13362.9272, 13334.1564, 13316.5912, 13311.3017, 13291.1923, 13286.4442, 13287.9368] +25-08-28 14:27:45 | D | - best error = [13362.9272, 13334.1564, 13316.5912, 13311.3017, 13291.1923, 13286.4442, 13286.4442] +25-08-28 14:27:46 | D | + Adding low-rank branches to transformer_blocks.15.ff.net.2.linear +25-08-28 14:27:46 | D | - Calibrating low-rank branch for transformer_blocks.15.ff_context.net.0.proj +25-08-28 14:27:46 | D | + w: sint4 +25-08-28 14:27:46 | D | + x: sint4 +25-08-28 14:27:46 | D | + y: None +25-08-28 14:27:46 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:27:46 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:27:46 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:27:46 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:27:58 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:27:58 | D | - error = [ 1842.1325, 1810.6421, 1792.0068, 1780.6438, 1769.2700, 1763.2111, 1754.9541, 1749.7754, 1746.1860, 1742.4595] +25-08-28 14:27:58 | D | - best error = [ 1842.1325, 1810.6421, 1792.0068, 1780.6438, 1769.2700, 1763.2111, 1754.9541, 1749.7754, 1746.1860, 1742.4595] +25-08-28 14:28:02 | D | - iter = [ 10, 11, 12, 13] +25-08-28 14:28:02 | D | - error = [ 1739.5671, 1735.4453, 1732.7407, 1733.0550] +25-08-28 14:28:02 | D | - best error = [ 1739.5671, 1735.4453, 1732.7407, 1732.7407] +25-08-28 14:28:02 | D | + Adding low-rank branches to transformer_blocks.15.ff_context.net.0.proj +25-08-28 14:28:03 | D | - Calibrating low-rank branch for transformer_blocks.15.ff_context.net.2.linear +25-08-28 14:28:03 | D | + w: sint4 +25-08-28 14:28:03 | D | + x: sint4 +25-08-28 14:28:03 | D | + y: None +25-08-28 14:28:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:28:03 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:28:03 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:28:04 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:28:16 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:28:16 | D | - error = [ 3396.2179, 3375.7854, 3360.2298, 3349.3163, 3334.2969, 3328.8973, 3323.0763, 3321.0964, 3313.2027, 3318.1985] +25-08-28 14:28:16 | D | - best error = [ 3396.2179, 3375.7854, 3360.2298, 3349.3163, 3334.2969, 3328.8973, 3323.0763, 3321.0964, 3313.2027, 3313.2027] +25-08-28 14:28:16 | D | + Adding low-rank branches to transformer_blocks.15.ff_context.net.2.linear +25-08-28 14:28:35 | D | - Calibrating low-rank branches of block transformer_blocks.16 +25-08-28 14:28:35 | D | - Calibrating low-rank branch for transformer_blocks.16.attn.to_q, transformer_blocks.16.attn.to_k, transformer_blocks.16.attn.to_v +25-08-28 14:28:35 | D | + w: sint4 +25-08-28 14:28:35 | D | + x: sint4 +25-08-28 14:28:35 | D | + y: None +25-08-28 14:28:35 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:28:35 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:28:35 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:28:36 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:28:55 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:28:55 | D | - error = [ 4685.7655, 4507.3868, 4381.5116, 4317.2776, 4286.8151, 4263.6714, 4230.6442, 4202.9863, 4180.1254, 4173.7123] +25-08-28 14:28:55 | D | - best error = [ 4685.7655, 4507.3868, 4381.5116, 4317.2776, 4286.8151, 4263.6714, 4230.6442, 4202.9863, 4180.1254, 4173.7123] +25-08-28 14:28:57 | D | - iter = [ 10] +25-08-28 14:28:57 | D | - error = [ 4183.8384] +25-08-28 14:28:57 | D | - best error = [ 4173.7123] +25-08-28 14:28:57 | D | + Adding low-rank branches to transformer_blocks.16.attn.to_q, transformer_blocks.16.attn.to_k, transformer_blocks.16.attn.to_v +25-08-28 14:28:57 | D | - Calibrating low-rank branch for transformer_blocks.16.attn.add_q_proj, transformer_blocks.16.attn.add_k_proj, transformer_blocks.16.attn.add_v_proj +25-08-28 14:28:57 | D | + w: sint4 +25-08-28 14:28:57 | D | + x: sint4 +25-08-28 14:28:57 | D | + y: None +25-08-28 14:28:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:28:57 | D | + finished parsing calibration arguments, ram usage: 17.6 +25-08-28 14:28:57 | D | + finished resetting calibrator, ram usage: 17.6 +25-08-28 14:28:58 | D | + finished calculating the original outputs, ram usage: 17.8 +25-08-28 14:29:10 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 14:29:10 | D | - error = [ 1705.0869, 1669.0941, 1655.4129, 1651.3370, 1644.7457, 1642.1757, 1645.6812] +25-08-28 14:29:10 | D | - best error = [ 1705.0869, 1669.0941, 1655.4129, 1651.3370, 1644.7457, 1642.1757, 1642.1757] +25-08-28 14:29:11 | D | + Adding low-rank branches to transformer_blocks.16.attn.add_q_proj, transformer_blocks.16.attn.add_k_proj, transformer_blocks.16.attn.add_v_proj +25-08-28 14:29:11 | D | - Calibrating low-rank branch for transformer_blocks.16.attn.to_out.0 +25-08-28 14:29:11 | D | + w: sint4 +25-08-28 14:29:11 | D | + x: sint4 +25-08-28 14:29:11 | D | + y: None +25-08-28 14:29:11 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:29:11 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:29:11 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:29:12 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:29:27 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:29:27 | D | - error = [ 9590.3969, 9507.4699, 9442.2434, 9390.6430, 9359.3632, 9331.5821, 9315.6720, 9304.2945, 9292.8200, 9276.1095] +25-08-28 14:29:27 | D | - best error = [ 9590.3969, 9507.4699, 9442.2434, 9390.6430, 9359.3632, 9331.5821, 9315.6720, 9304.2945, 9292.8200, 9276.1095] +25-08-28 14:29:42 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:29:42 | D | - error = [ 9271.3097, 9262.3133, 9251.5504, 9237.9630, 9233.2880, 9228.0949, 9221.2776, 9217.2482, 9211.4807, 9205.4831] +25-08-28 14:29:42 | D | - best error = [ 9271.3097, 9262.3133, 9251.5504, 9237.9630, 9233.2880, 9228.0949, 9221.2776, 9217.2482, 9211.4807, 9205.4831] +25-08-28 14:29:45 | D | - iter = [ 20, 21] +25-08-28 14:29:45 | D | - error = [ 9203.5289, 9204.7049] +25-08-28 14:29:45 | D | - best error = [ 9203.5289, 9203.5289] +25-08-28 14:29:46 | D | + Adding low-rank branches to transformer_blocks.16.attn.to_out.0 +25-08-28 14:29:46 | D | - Calibrating low-rank branch for transformer_blocks.16.attn.to_add_out +25-08-28 14:29:46 | D | + w: sint4 +25-08-28 14:29:46 | D | + x: sint4 +25-08-28 14:29:46 | D | + y: None +25-08-28 14:29:46 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:29:46 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:29:46 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:29:46 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:29:56 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:29:56 | D | - error = [ 1712.2402, 1693.4760, 1678.3090, 1671.4825, 1660.9057, 1655.6263, 1650.0543, 1644.8916, 1639.7657, 1639.7604] +25-08-28 14:29:56 | D | - best error = [ 1712.2402, 1693.4760, 1678.3090, 1671.4825, 1660.9057, 1655.6263, 1650.0543, 1644.8916, 1639.7657, 1639.7604] +25-08-28 14:30:00 | D | - iter = [ 10, 11, 12, 13] +25-08-28 14:30:00 | D | - error = [ 1637.1574, 1634.0220, 1631.6895, 1631.8851] +25-08-28 14:30:00 | D | - best error = [ 1637.1574, 1634.0220, 1631.6895, 1631.6895] +25-08-28 14:30:01 | D | + Adding low-rank branches to transformer_blocks.16.attn.to_add_out +25-08-28 14:30:01 | D | - Calibrating low-rank branch for transformer_blocks.16.ff.net.0.proj +25-08-28 14:30:01 | D | + w: sint4 +25-08-28 14:30:01 | D | + x: sint4 +25-08-28 14:30:01 | D | + y: None +25-08-28 14:30:01 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:30:01 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:30:01 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:30:02 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:30:23 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:30:23 | D | - error = [ 6466.9987, 6394.3072, 6343.3281, 6306.7881, 6279.5560, 6258.9753, 6242.8968, 6229.3990, 6220.8289, 6212.3957] +25-08-28 14:30:23 | D | - best error = [ 6466.9987, 6394.3072, 6343.3281, 6306.7881, 6279.5560, 6258.9753, 6242.8968, 6229.3990, 6220.8289, 6212.3957] +25-08-28 14:30:44 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:30:44 | D | - error = [ 6207.6949, 6199.8103, 6193.6364, 6188.8319, 6184.3690, 6181.1368, 6177.1278, 6174.5332, 6171.0502, 6168.3314] +25-08-28 14:30:44 | D | - best error = [ 6207.6949, 6199.8103, 6193.6364, 6188.8319, 6184.3690, 6181.1368, 6177.1278, 6174.5332, 6171.0502, 6168.3314] +25-08-28 14:30:54 | D | - iter = [ 20, 21, 22, 23, 24] +25-08-28 14:30:54 | D | - error = [ 6166.2092, 6163.2826, 6161.9250, 6158.8025, 6159.2792] +25-08-28 14:30:54 | D | - best error = [ 6166.2092, 6163.2826, 6161.9250, 6158.8025, 6158.8025] +25-08-28 14:30:55 | D | + Adding low-rank branches to transformer_blocks.16.ff.net.0.proj +25-08-28 14:30:55 | D | - Calibrating low-rank branch for transformer_blocks.16.ff.net.2.linear +25-08-28 14:30:55 | D | + w: sint4 +25-08-28 14:30:55 | D | + x: sint4 +25-08-28 14:30:55 | D | + y: None +25-08-28 14:30:55 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:30:55 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:30:55 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:30:59 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:31:15 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-28 14:31:15 | D | - error = [14115.0592, 14076.4086, 14047.7802, 14037.4841, 14037.6853] +25-08-28 14:31:15 | D | - best error = [14115.0592, 14076.4086, 14047.7802, 14037.4841, 14037.4841] +25-08-28 14:31:15 | D | + Adding low-rank branches to transformer_blocks.16.ff.net.2.linear +25-08-28 14:31:16 | D | - Calibrating low-rank branch for transformer_blocks.16.ff_context.net.0.proj +25-08-28 14:31:16 | D | + w: sint4 +25-08-28 14:31:16 | D | + x: sint4 +25-08-28 14:31:16 | D | + y: None +25-08-28 14:31:16 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:31:16 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:31:16 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:31:16 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:31:28 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:31:28 | D | - error = [ 2241.0187, 2205.1790, 2181.1597, 2167.4859, 2155.6351, 2142.7223, 2134.2428, 2126.3400, 2121.0075, 2117.7891] +25-08-28 14:31:28 | D | - best error = [ 2241.0187, 2205.1790, 2181.1597, 2167.4859, 2155.6351, 2142.7223, 2134.2428, 2126.3400, 2121.0075, 2117.7891] +25-08-28 14:31:39 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:31:39 | D | - error = [ 2113.8691, 2110.4680, 2107.4097, 2106.2356, 2102.6851, 2102.4839, 2101.3351, 2100.2903, 2097.0397, 2097.0055] +25-08-28 14:31:39 | D | - best error = [ 2113.8691, 2110.4680, 2107.4097, 2106.2356, 2102.6851, 2102.4839, 2101.3351, 2100.2903, 2097.0397, 2097.0055] +25-08-28 14:31:43 | D | - iter = [ 20, 21, 22] +25-08-28 14:31:43 | D | - error = [ 2095.0801, 2093.3285, 2093.4363] +25-08-28 14:31:43 | D | - best error = [ 2095.0801, 2093.3285, 2093.3285] +25-08-28 14:31:43 | D | + Adding low-rank branches to transformer_blocks.16.ff_context.net.0.proj +25-08-28 14:31:43 | D | - Calibrating low-rank branch for transformer_blocks.16.ff_context.net.2.linear +25-08-28 14:31:43 | D | + w: sint4 +25-08-28 14:31:43 | D | + x: sint4 +25-08-28 14:31:43 | D | + y: None +25-08-28 14:31:43 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:31:43 | D | + finished parsing calibration arguments, ram usage: 16.5 +25-08-28 14:31:43 | D | + finished resetting calibrator, ram usage: 16.5 +25-08-28 14:31:44 | D | + finished calculating the original outputs, ram usage: 16.7 +25-08-28 14:31:56 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:31:56 | D | - error = [ 3346.6954, 3315.3104, 3296.1277, 3289.2765, 3280.7539, 3277.2839, 3276.0154, 3271.6934, 3269.2814, 3268.9449] +25-08-28 14:31:56 | D | - best error = [ 3346.6954, 3315.3104, 3296.1277, 3289.2765, 3280.7539, 3277.2839, 3276.0154, 3271.6934, 3269.2814, 3268.9449] +25-08-28 14:31:58 | D | - iter = [ 10] +25-08-28 14:31:58 | D | - error = [ 3270.3926] +25-08-28 14:31:58 | D | - best error = [ 3268.9449] +25-08-28 14:31:58 | D | + Adding low-rank branches to transformer_blocks.16.ff_context.net.2.linear +25-08-28 14:32:16 | D | - Calibrating low-rank branches of block transformer_blocks.17 +25-08-28 14:32:16 | D | - Calibrating low-rank branch for transformer_blocks.17.attn.to_q, transformer_blocks.17.attn.to_k, transformer_blocks.17.attn.to_v +25-08-28 14:32:16 | D | + w: sint4 +25-08-28 14:32:16 | D | + x: sint4 +25-08-28 14:32:16 | D | + y: None +25-08-28 14:32:16 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:32:16 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:32:16 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:32:18 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:32:36 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:32:36 | D | - error = [ 4901.7113, 4714.0734, 4594.3494, 4527.9344, 4492.1200, 4440.8765, 4419.9043, 4376.7931, 4370.0950, 4352.1907] +25-08-28 14:32:36 | D | - best error = [ 4901.7113, 4714.0734, 4594.3494, 4527.9344, 4492.1200, 4440.8765, 4419.9043, 4376.7931, 4370.0950, 4352.1907] +25-08-28 14:32:46 | D | - iter = [ 10, 11, 12, 13, 14] +25-08-28 14:32:46 | D | - error = [ 4343.9328, 4342.2724, 4322.5944, 4313.2865, 4317.0302] +25-08-28 14:32:46 | D | - best error = [ 4343.9328, 4342.2724, 4322.5944, 4313.2865, 4313.2865] +25-08-28 14:32:46 | D | + Adding low-rank branches to transformer_blocks.17.attn.to_q, transformer_blocks.17.attn.to_k, transformer_blocks.17.attn.to_v +25-08-28 14:32:46 | D | - Calibrating low-rank branch for transformer_blocks.17.attn.add_q_proj, transformer_blocks.17.attn.add_k_proj, transformer_blocks.17.attn.add_v_proj +25-08-28 14:32:46 | D | + w: sint4 +25-08-28 14:32:46 | D | + x: sint4 +25-08-28 14:32:46 | D | + y: None +25-08-28 14:32:46 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:32:46 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:32:46 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:32:47 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:32:58 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 14:32:58 | D | - error = [ 1878.3909, 1851.3789, 1819.8571, 1810.4236, 1803.1609, 1813.4948] +25-08-28 14:32:58 | D | - best error = [ 1878.3909, 1851.3789, 1819.8571, 1810.4236, 1803.1609, 1803.1609] +25-08-28 14:32:58 | D | + Adding low-rank branches to transformer_blocks.17.attn.add_q_proj, transformer_blocks.17.attn.add_k_proj, transformer_blocks.17.attn.add_v_proj +25-08-28 14:32:58 | D | - Calibrating low-rank branch for transformer_blocks.17.attn.to_out.0 +25-08-28 14:32:58 | D | + w: sint4 +25-08-28 14:32:58 | D | + x: sint4 +25-08-28 14:32:58 | D | + y: None +25-08-28 14:32:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:32:58 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:32:58 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:33:00 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:33:15 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:33:15 | D | - error = [ 9180.4623, 9091.2327, 9033.5561, 8997.7486, 8969.2015, 8942.5978, 8919.1941, 8910.6345, 8889.8643, 8877.4208] +25-08-28 14:33:15 | D | - best error = [ 9180.4623, 9091.2327, 9033.5561, 8997.7486, 8969.2015, 8942.5978, 8919.1941, 8910.6345, 8889.8643, 8877.4208] +25-08-28 14:33:30 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:33:30 | D | - error = [ 8865.3013, 8859.9702, 8858.3141, 8851.5730, 8847.1130, 8840.6633, 8840.1321, 8832.6816, 8828.1740, 8824.6500] +25-08-28 14:33:30 | D | - best error = [ 8865.3013, 8859.9702, 8858.3141, 8851.5730, 8847.1130, 8840.6633, 8840.1321, 8832.6816, 8828.1740, 8824.6500] +25-08-28 14:33:45 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 14:33:45 | D | - error = [ 8818.4366, 8816.0708, 8812.2351, 8809.4141, 8804.7115, 8803.8914, 8801.2054, 8800.1841, 8799.8927, 8798.7267] +25-08-28 14:33:45 | D | - best error = [ 8818.4366, 8816.0708, 8812.2351, 8809.4141, 8804.7115, 8803.8914, 8801.2054, 8800.1841, 8799.8927, 8798.7267] +25-08-28 14:33:51 | D | - iter = [ 30, 31, 32, 33] +25-08-28 14:33:51 | D | - error = [ 8796.1701, 8794.5037, 8792.3799, 8794.3455] +25-08-28 14:33:51 | D | - best error = [ 8796.1701, 8794.5037, 8792.3799, 8792.3799] +25-08-28 14:33:52 | D | + Adding low-rank branches to transformer_blocks.17.attn.to_out.0 +25-08-28 14:33:52 | D | - Calibrating low-rank branch for transformer_blocks.17.attn.to_add_out +25-08-28 14:33:52 | D | + w: sint4 +25-08-28 14:33:52 | D | + x: sint4 +25-08-28 14:33:52 | D | + y: None +25-08-28 14:33:52 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:33:52 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:33:52 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:33:52 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:34:02 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:34:02 | D | - error = [ 1652.7695, 1632.0885, 1620.5828, 1614.4837, 1607.3074, 1599.7848, 1595.5898, 1593.4514, 1589.8952, 1589.8831] +25-08-28 14:34:02 | D | - best error = [ 1652.7695, 1632.0885, 1620.5828, 1614.4837, 1607.3074, 1599.7848, 1595.5898, 1593.4514, 1589.8952, 1589.8831] +25-08-28 14:34:08 | D | - iter = [ 10, 11, 12, 13, 14] +25-08-28 14:34:08 | D | - error = [ 1589.3082, 1587.7068, 1583.6116, 1582.9992, 1584.0401] +25-08-28 14:34:08 | D | - best error = [ 1589.3082, 1587.7068, 1583.6116, 1582.9992, 1582.9992] +25-08-28 14:34:08 | D | + Adding low-rank branches to transformer_blocks.17.attn.to_add_out +25-08-28 14:34:08 | D | - Calibrating low-rank branch for transformer_blocks.17.ff.net.0.proj +25-08-28 14:34:08 | D | + w: sint4 +25-08-28 14:34:08 | D | + x: sint4 +25-08-28 14:34:08 | D | + y: None +25-08-28 14:34:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:34:08 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:34:08 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:34:10 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:34:31 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:34:31 | D | - error = [ 4134.3882, 4089.2491, 4059.7759, 4037.5782, 4022.6430, 4010.6264, 4000.8940, 3992.1300, 3985.4650, 3980.2232] +25-08-28 14:34:31 | D | - best error = [ 4134.3882, 4089.2491, 4059.7759, 4037.5782, 4022.6430, 4010.6264, 4000.8940, 3992.1300, 3985.4650, 3980.2232] +25-08-28 14:34:51 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:34:51 | D | - error = [ 3975.7743, 3971.0057, 3967.3071, 3964.9282, 3962.6410, 3960.8300, 3958.4275, 3956.4766, 3954.9164, 3952.9461] +25-08-28 14:34:51 | D | - best error = [ 3975.7743, 3971.0057, 3967.3071, 3964.9282, 3962.6410, 3960.8300, 3958.4275, 3956.4766, 3954.9164, 3952.9461] +25-08-28 14:35:12 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 14:35:12 | D | - error = [ 3951.7194, 3950.3659, 3949.7744, 3948.4251, 3947.4762, 3946.3716, 3946.0289, 3944.7108, 3944.4875, 3943.5743] +25-08-28 14:35:12 | D | - best error = [ 3951.7194, 3950.3659, 3949.7744, 3948.4251, 3947.4762, 3946.3716, 3946.0289, 3944.7108, 3944.4875, 3943.5743] +25-08-28 14:35:18 | D | - iter = [ 30, 31, 32] +25-08-28 14:35:18 | D | - error = [ 3943.0465, 3942.4695, 3942.5740] +25-08-28 14:35:18 | D | - best error = [ 3943.0465, 3942.4695, 3942.4695] +25-08-28 14:35:18 | D | + Adding low-rank branches to transformer_blocks.17.ff.net.0.proj +25-08-28 14:35:18 | D | - Calibrating low-rank branch for transformer_blocks.17.ff.net.2.linear +25-08-28 14:35:18 | D | + w: sint4 +25-08-28 14:35:18 | D | + x: sint4 +25-08-28 14:35:18 | D | + y: None +25-08-28 14:35:18 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:35:18 | D | + finished parsing calibration arguments, ram usage: 18.5 +25-08-28 14:35:18 | D | + finished resetting calibrator, ram usage: 18.5 +25-08-28 14:35:23 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:35:44 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 14:35:44 | D | - error = [12654.7572, 12624.5672, 12593.0266, 12567.2068, 12558.8929, 12552.7450, 12554.7690] +25-08-28 14:35:44 | D | - best error = [12654.7572, 12624.5672, 12593.0266, 12567.2068, 12558.8929, 12552.7450, 12552.7450] +25-08-28 14:35:44 | D | + Adding low-rank branches to transformer_blocks.17.ff.net.2.linear +25-08-28 14:35:44 | D | - Calibrating low-rank branch for transformer_blocks.17.ff_context.net.0.proj +25-08-28 14:35:44 | D | + w: sint4 +25-08-28 14:35:44 | D | + x: sint4 +25-08-28 14:35:44 | D | + y: None +25-08-28 14:35:44 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:35:44 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:35:44 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:35:45 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:35:56 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:35:56 | D | - error = [ 1804.6232, 1768.3911, 1739.4838, 1718.3544, 1701.2039, 1690.7590, 1681.2301, 1672.8139, 1666.6622, 1662.5081] +25-08-28 14:35:56 | D | - best error = [ 1804.6232, 1768.3911, 1739.4838, 1718.3544, 1701.2039, 1690.7590, 1681.2301, 1672.8139, 1666.6622, 1662.5081] +25-08-28 14:36:07 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:36:07 | D | - error = [ 1658.5652, 1653.9472, 1650.3669, 1647.2400, 1644.0108, 1643.6780, 1640.8950, 1638.7089, 1636.1338, 1636.9223] +25-08-28 14:36:07 | D | - best error = [ 1658.5652, 1653.9472, 1650.3669, 1647.2400, 1644.0108, 1643.6780, 1640.8950, 1638.7089, 1636.1338, 1636.1338] +25-08-28 14:36:08 | D | + Adding low-rank branches to transformer_blocks.17.ff_context.net.0.proj +25-08-28 14:36:08 | D | - Calibrating low-rank branch for transformer_blocks.17.ff_context.net.2.linear +25-08-28 14:36:08 | D | + w: sint4 +25-08-28 14:36:08 | D | + x: sint4 +25-08-28 14:36:08 | D | + y: None +25-08-28 14:36:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:36:08 | D | + finished parsing calibration arguments, ram usage: 18.4 +25-08-28 14:36:08 | D | + finished resetting calibrator, ram usage: 18.4 +25-08-28 14:36:09 | D | + finished calculating the original outputs, ram usage: 18.4 +25-08-28 14:36:21 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:36:21 | D | - error = [ 3893.8716, 3870.6924, 3850.4724, 3835.9513, 3829.7919, 3819.8054, 3814.8789, 3814.1780, 3805.1989, 3813.7821] +25-08-28 14:36:21 | D | - best error = [ 3893.8716, 3870.6924, 3850.4724, 3835.9513, 3829.7919, 3819.8054, 3814.8789, 3814.1780, 3805.1989, 3805.1989] +25-08-28 14:36:22 | D | + Adding low-rank branches to transformer_blocks.17.ff_context.net.2.linear +25-08-28 14:36:40 | D | - Calibrating low-rank branches of block transformer_blocks.18 +25-08-28 14:36:40 | D | - Calibrating low-rank branch for transformer_blocks.18.attn.to_q, transformer_blocks.18.attn.to_k, transformer_blocks.18.attn.to_v +25-08-28 14:36:40 | D | + w: sint4 +25-08-28 14:36:40 | D | + x: sint4 +25-08-28 14:36:40 | D | + y: None +25-08-28 14:36:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:36:40 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:36:40 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:36:41 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:37:00 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:37:00 | D | - error = [ 5622.7803, 5470.4952, 5394.5914, 5338.6420, 5293.0704, 5271.7160, 5240.7351, 5216.1499, 5213.3778, 5196.6285] +25-08-28 14:37:00 | D | - best error = [ 5622.7803, 5470.4952, 5394.5914, 5338.6420, 5293.0704, 5271.7160, 5240.7351, 5216.1499, 5213.3778, 5196.6285] +25-08-28 14:37:03 | D | - iter = [ 10, 11] +25-08-28 14:37:03 | D | - error = [ 5184.2718, 5198.3428] +25-08-28 14:37:03 | D | - best error = [ 5184.2718, 5184.2718] +25-08-28 14:37:04 | D | + Adding low-rank branches to transformer_blocks.18.attn.to_q, transformer_blocks.18.attn.to_k, transformer_blocks.18.attn.to_v +25-08-28 14:37:04 | D | - Calibrating low-rank branch for transformer_blocks.18.attn.add_q_proj, transformer_blocks.18.attn.add_k_proj, transformer_blocks.18.attn.add_v_proj +25-08-28 14:37:04 | D | + w: sint4 +25-08-28 14:37:04 | D | + x: sint4 +25-08-28 14:37:04 | D | + y: None +25-08-28 14:37:04 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:37:04 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:37:04 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:37:05 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:37:22 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:37:22 | D | - error = [ 2624.0622, 2577.6814, 2530.4897, 2517.2951, 2497.5796, 2497.3318, 2490.4963, 2479.5895, 2474.6824, 2480.6205] +25-08-28 14:37:22 | D | - best error = [ 2624.0622, 2577.6814, 2530.4897, 2517.2951, 2497.5796, 2497.3318, 2490.4963, 2479.5895, 2474.6824, 2474.6824] +25-08-28 14:37:23 | D | + Adding low-rank branches to transformer_blocks.18.attn.add_q_proj, transformer_blocks.18.attn.add_k_proj, transformer_blocks.18.attn.add_v_proj +25-08-28 14:37:23 | D | - Calibrating low-rank branch for transformer_blocks.18.attn.to_out.0 +25-08-28 14:37:23 | D | + w: sint4 +25-08-28 14:37:23 | D | + x: sint4 +25-08-28 14:37:23 | D | + y: None +25-08-28 14:37:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:37:23 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:37:23 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:37:24 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:37:39 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:37:39 | D | - error = [ 9661.8019, 9558.2295, 9503.1923, 9460.6190, 9425.7469, 9397.8474, 9384.4514, 9365.0879, 9349.9373, 9338.5087] +25-08-28 14:37:39 | D | - best error = [ 9661.8019, 9558.2295, 9503.1923, 9460.6190, 9425.7469, 9397.8474, 9384.4514, 9365.0879, 9349.9373, 9338.5087] +25-08-28 14:37:52 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18] +25-08-28 14:37:52 | D | - error = [ 9327.9511, 9321.8153, 9319.8918, 9314.3979, 9303.9364, 9296.2358, 9289.8030, 9283.6889, 9284.2193] +25-08-28 14:37:52 | D | - best error = [ 9327.9511, 9321.8153, 9319.8918, 9314.3979, 9303.9364, 9296.2358, 9289.8030, 9283.6889, 9283.6889] +25-08-28 14:37:53 | D | + Adding low-rank branches to transformer_blocks.18.attn.to_out.0 +25-08-28 14:37:53 | D | - Calibrating low-rank branch for transformer_blocks.18.attn.to_add_out +25-08-28 14:37:53 | D | + w: sint4 +25-08-28 14:37:53 | D | + x: sint4 +25-08-28 14:37:53 | D | + y: None +25-08-28 14:37:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:37:53 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:37:53 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:37:53 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:38:03 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:38:03 | D | - error = [ 2047.1141, 2013.0447, 1999.7772, 1984.3540, 1973.9741, 1960.9092, 1953.7019, 1942.6209, 1936.5955, 1933.3291] +25-08-28 14:38:03 | D | - best error = [ 2047.1141, 2013.0447, 1999.7772, 1984.3540, 1973.9741, 1960.9092, 1953.7019, 1942.6209, 1936.5955, 1933.3291] +25-08-28 14:38:07 | D | - iter = [ 10, 11, 12, 13] +25-08-28 14:38:07 | D | - error = [ 1930.0447, 1929.4219, 1928.2869, 1928.8104] +25-08-28 14:38:07 | D | - best error = [ 1930.0447, 1929.4219, 1928.2869, 1928.2869] +25-08-28 14:38:08 | D | + Adding low-rank branches to transformer_blocks.18.attn.to_add_out +25-08-28 14:38:08 | D | - Calibrating low-rank branch for transformer_blocks.18.ff.net.0.proj +25-08-28 14:38:08 | D | + w: sint4 +25-08-28 14:38:08 | D | + x: sint4 +25-08-28 14:38:08 | D | + y: None +25-08-28 14:38:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:38:08 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:38:08 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:38:10 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:38:30 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:38:30 | D | - error = [ 6228.5533, 6160.5224, 6108.7671, 6074.5495, 6049.8469, 6028.3985, 6015.1083, 6003.7741, 5994.5258, 5985.9656] +25-08-28 14:38:30 | D | - best error = [ 6228.5533, 6160.5224, 6108.7671, 6074.5495, 6049.8469, 6028.3985, 6015.1083, 6003.7741, 5994.5258, 5985.9656] +25-08-28 14:38:51 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:38:51 | D | - error = [ 5977.7842, 5970.0925, 5965.7274, 5960.6302, 5957.9077, 5954.5512, 5950.8282, 5946.8913, 5943.4949, 5941.8506] +25-08-28 14:38:51 | D | - best error = [ 5977.7842, 5970.0925, 5965.7274, 5960.6302, 5957.9077, 5954.5512, 5950.8282, 5946.8913, 5943.4949, 5941.8506] +25-08-28 14:39:09 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28] +25-08-28 14:39:09 | D | - error = [ 5939.2030, 5937.4425, 5936.1398, 5933.8375, 5930.9510, 5929.2451, 5927.6478, 5925.3047, 5926.1944] +25-08-28 14:39:09 | D | - best error = [ 5939.2030, 5937.4425, 5936.1398, 5933.8375, 5930.9510, 5929.2451, 5927.6478, 5925.3047, 5925.3047] +25-08-28 14:39:09 | D | + Adding low-rank branches to transformer_blocks.18.ff.net.0.proj +25-08-28 14:39:09 | D | - Calibrating low-rank branch for transformer_blocks.18.ff.net.2.linear +25-08-28 14:39:09 | D | + w: sint4 +25-08-28 14:39:09 | D | + x: sint4 +25-08-28 14:39:09 | D | + y: None +25-08-28 14:39:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:39:09 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:39:09 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:39:14 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:39:40 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 14:39:40 | D | - error = [ 9039.6062, 9014.0357, 9000.7586, 8983.9377, 8968.8617, 8957.7060, 8944.7857, 8933.6599, 8936.4901] +25-08-28 14:39:40 | D | - best error = [ 9039.6062, 9014.0357, 9000.7586, 8983.9377, 8968.8617, 8957.7060, 8944.7857, 8933.6599, 8933.6599] +25-08-28 14:39:41 | D | + Adding low-rank branches to transformer_blocks.18.ff.net.2.linear +25-08-28 14:39:41 | D | - Calibrating low-rank branch for transformer_blocks.18.ff_context.net.0.proj +25-08-28 14:39:41 | D | + w: sint4 +25-08-28 14:39:41 | D | + x: sint4 +25-08-28 14:39:41 | D | + y: None +25-08-28 14:39:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:39:41 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:39:41 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:39:41 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:39:53 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:39:53 | D | - error = [ 3160.3215, 3133.6953, 3120.4436, 3112.8169, 3102.6893, 3096.9022, 3092.9651, 3088.8108, 3081.3185, 3080.5423] +25-08-28 14:39:53 | D | - best error = [ 3160.3215, 3133.6953, 3120.4436, 3112.8169, 3102.6893, 3096.9022, 3092.9651, 3088.8108, 3081.3185, 3080.5423] +25-08-28 14:39:57 | D | - iter = [ 10, 11, 12, 13] +25-08-28 14:39:57 | D | - error = [ 3077.1690, 3072.8789, 3069.5125, 3069.6617] +25-08-28 14:39:57 | D | - best error = [ 3077.1690, 3072.8789, 3069.5125, 3069.5125] +25-08-28 14:39:58 | D | + Adding low-rank branches to transformer_blocks.18.ff_context.net.0.proj +25-08-28 14:39:58 | D | - Calibrating low-rank branch for transformer_blocks.18.ff_context.net.2.linear +25-08-28 14:39:58 | D | + w: sint4 +25-08-28 14:39:58 | D | + x: sint4 +25-08-28 14:39:58 | D | + y: None +25-08-28 14:39:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:39:58 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:39:58 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:39:59 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:40:10 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 14:40:10 | D | - error = [ 6383.2161, 6262.8693, 6234.6092, 6193.4733, 6186.5580, 6156.5497, 6147.7814, 6127.9820, 6140.8895] +25-08-28 14:40:10 | D | - best error = [ 6383.2161, 6262.8693, 6234.6092, 6193.4733, 6186.5580, 6156.5497, 6147.7814, 6127.9820, 6127.9820] +25-08-28 14:40:10 | D | + Adding low-rank branches to transformer_blocks.18.ff_context.net.2.linear +25-08-28 14:40:30 | D | - Calibrating low-rank branches of block single_transformer_blocks.0 +25-08-28 14:40:30 | D | - Calibrating low-rank branch for single_transformer_blocks.0.attn.to_q, single_transformer_blocks.0.attn.to_k, single_transformer_blocks.0.attn.to_v +25-08-28 14:40:30 | D | + w: sint4 +25-08-28 14:40:30 | D | + x: sint4 +25-08-28 14:40:30 | D | + y: None +25-08-28 14:40:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:40:30 | D | + finished parsing calibration arguments, ram usage: 18.5 +25-08-28 14:40:30 | D | + finished resetting calibrator, ram usage: 18.5 +25-08-28 14:40:31 | D | + finished calculating the original outputs, ram usage: 18.5 +25-08-28 14:40:53 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:40:53 | D | - error = [ 9748.4333, 9530.0900, 9313.2735, 9305.8236, 9173.2271, 9169.1204, 9158.8331, 9066.8036, 8992.5934, 8968.8220] +25-08-28 14:40:53 | D | - best error = [ 9748.4333, 9530.0900, 9313.2735, 9305.8236, 9173.2271, 9169.1204, 9158.8331, 9066.8036, 8992.5934, 8968.8220] +25-08-28 14:40:57 | D | - iter = [ 10, 11] +25-08-28 14:40:57 | D | - error = [ 8955.1241, 9007.6017] +25-08-28 14:40:57 | D | - best error = [ 8955.1241, 8955.1241] +25-08-28 14:40:58 | D | + Adding low-rank branches to single_transformer_blocks.0.attn.to_q, single_transformer_blocks.0.attn.to_k, single_transformer_blocks.0.attn.to_v +25-08-28 14:40:58 | D | - Calibrating low-rank branch for single_transformer_blocks.0.proj_out.linears.0 +25-08-28 14:40:58 | D | + w: sint4 +25-08-28 14:40:58 | D | + x: sint4 +25-08-28 14:40:58 | D | + y: None +25-08-28 14:40:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:40:58 | D | + finished parsing calibration arguments, ram usage: 18.5 +25-08-28 14:40:58 | D | + finished resetting calibrator, ram usage: 18.5 +25-08-28 14:40:59 | D | + finished calculating the original outputs, ram usage: 18.5 +25-08-28 14:41:14 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:41:14 | D | - error = [ 6862.1979, 6801.6469, 6759.1905, 6725.5483, 6697.6671, 6674.4075, 6657.3286, 6653.6255, 6641.8004, 6632.5419] +25-08-28 14:41:14 | D | - best error = [ 6862.1979, 6801.6469, 6759.1905, 6725.5483, 6697.6671, 6674.4075, 6657.3286, 6653.6255, 6641.8004, 6632.5419] +25-08-28 14:41:29 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:41:29 | D | - error = [ 6625.1122, 6621.1564, 6618.3270, 6610.9518, 6606.7111, 6603.0569, 6595.6477, 6594.6942, 6592.4172, 6591.3085] +25-08-28 14:41:29 | D | - best error = [ 6625.1122, 6621.1564, 6618.3270, 6610.9518, 6606.7111, 6603.0569, 6595.6477, 6594.6942, 6592.4172, 6591.3085] +25-08-28 14:41:40 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26] +25-08-28 14:41:40 | D | - error = [ 6585.7243, 6585.5903, 6585.4197, 6584.1498, 6582.6208, 6578.1326, 6580.2313] +25-08-28 14:41:40 | D | - best error = [ 6585.7243, 6585.5903, 6585.4197, 6584.1498, 6582.6208, 6578.1326, 6578.1326] +25-08-28 14:41:40 | D | + Adding low-rank branches to single_transformer_blocks.0.proj_out.linears.0 +25-08-28 14:41:41 | D | - Calibrating low-rank branch for single_transformer_blocks.0.proj_mlp +25-08-28 14:41:41 | D | + w: sint4 +25-08-28 14:41:41 | D | + x: sint4 +25-08-28 14:41:41 | D | + y: None +25-08-28 14:41:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:41:41 | D | + finished parsing calibration arguments, ram usage: 18.5 +25-08-28 14:41:41 | D | + finished resetting calibrator, ram usage: 18.5 +25-08-28 14:41:43 | D | + finished calculating the original outputs, ram usage: 18.5 +25-08-28 14:42:04 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:42:04 | D | - error = [ 7038.0887, 6984.6933, 6949.5928, 6925.2974, 6905.3038, 6890.2554, 6878.5685, 6867.7254, 6860.0303, 6855.2047] +25-08-28 14:42:04 | D | - best error = [ 7038.0887, 6984.6933, 6949.5928, 6925.2974, 6905.3038, 6890.2554, 6878.5685, 6867.7254, 6860.0303, 6855.2047] +25-08-28 14:42:26 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:42:26 | D | - error = [ 6849.7352, 6845.5469, 6843.4556, 6838.9645, 6836.9584, 6833.7858, 6830.2264, 6829.0187, 6827.2841, 6825.7348] +25-08-28 14:42:26 | D | - best error = [ 6849.7352, 6845.5469, 6843.4556, 6838.9645, 6836.9584, 6833.7858, 6830.2264, 6829.0187, 6827.2841, 6825.7348] +25-08-28 14:42:48 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 14:42:48 | D | - error = [ 6823.2667, 6820.3844, 6820.2774, 6817.7034, 6816.5499, 6815.1707, 6814.0825, 6813.2075, 6812.0816, 6811.8658] +25-08-28 14:42:48 | D | - best error = [ 6823.2667, 6820.3844, 6820.2774, 6817.7034, 6816.5499, 6815.1707, 6814.0825, 6813.2075, 6812.0816, 6811.8658] +25-08-28 14:43:08 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38] +25-08-28 14:43:08 | D | - error = [ 6811.0052, 6810.1433, 6809.5351, 6808.0834, 6807.3772, 6806.4273, 6805.3738, 6804.3536, 6804.9138] +25-08-28 14:43:08 | D | - best error = [ 6811.0052, 6810.1433, 6809.5351, 6808.0834, 6807.3772, 6806.4273, 6805.3738, 6804.3536, 6804.3536] +25-08-28 14:43:09 | D | + Adding low-rank branches to single_transformer_blocks.0.proj_mlp +25-08-28 14:43:09 | D | - Calibrating low-rank branch for single_transformer_blocks.0.proj_out.linears.1.linear +25-08-28 14:43:09 | D | + w: sint4 +25-08-28 14:43:09 | D | + x: sint4 +25-08-28 14:43:09 | D | + y: None +25-08-28 14:43:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:43:09 | D | + finished parsing calibration arguments, ram usage: 18.5 +25-08-28 14:43:09 | D | + finished resetting calibrator, ram usage: 18.5 +25-08-28 14:43:14 | D | + finished calculating the original outputs, ram usage: 18.5 +25-08-28 14:43:47 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:43:47 | D | - error = [14432.2295, 14397.6998, 14374.4919, 14342.5800, 14317.4445, 14309.5638, 14304.1499, 14300.9477, 14293.9394, 14294.0385] +25-08-28 14:43:47 | D | - best error = [14432.2295, 14397.6998, 14374.4919, 14342.5800, 14317.4445, 14309.5638, 14304.1499, 14300.9477, 14293.9394, 14293.9394] +25-08-28 14:43:47 | D | + Adding low-rank branches to single_transformer_blocks.0.proj_out.linears.1.linear +25-08-28 14:44:05 | D | - Calibrating low-rank branches of block single_transformer_blocks.1 +25-08-28 14:44:05 | D | - Calibrating low-rank branch for single_transformer_blocks.1.attn.to_q, single_transformer_blocks.1.attn.to_k, single_transformer_blocks.1.attn.to_v +25-08-28 14:44:06 | D | + w: sint4 +25-08-28 14:44:06 | D | + x: sint4 +25-08-28 14:44:06 | D | + y: None +25-08-28 14:44:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:44:06 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:44:06 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:44:07 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:44:23 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 14:44:23 | D | - error = [ 9737.2826, 9483.1218, 9366.0933, 9223.4237, 9202.4471, 9186.5976, 9215.7122] +25-08-28 14:44:23 | D | - best error = [ 9737.2826, 9483.1218, 9366.0933, 9223.4237, 9202.4471, 9186.5976, 9186.5976] +25-08-28 14:44:23 | D | + Adding low-rank branches to single_transformer_blocks.1.attn.to_q, single_transformer_blocks.1.attn.to_k, single_transformer_blocks.1.attn.to_v +25-08-28 14:44:23 | D | - Calibrating low-rank branch for single_transformer_blocks.1.proj_out.linears.0 +25-08-28 14:44:23 | D | + w: sint4 +25-08-28 14:44:23 | D | + x: sint4 +25-08-28 14:44:23 | D | + y: None +25-08-28 14:44:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:44:23 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:44:23 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:44:25 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:44:41 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:44:41 | D | - error = [ 5440.9293, 5381.9770, 5348.0673, 5322.6914, 5307.8679, 5290.3331, 5278.5142, 5272.2185, 5262.6659, 5256.6636] +25-08-28 14:44:41 | D | - best error = [ 5440.9293, 5381.9770, 5348.0673, 5322.6914, 5307.8679, 5290.3331, 5278.5142, 5272.2185, 5262.6659, 5256.6636] +25-08-28 14:44:57 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:44:57 | D | - error = [ 5247.4407, 5243.3890, 5240.7816, 5238.7250, 5234.0883, 5230.1424, 5228.3854, 5226.0978, 5222.2076, 5219.1133] +25-08-28 14:44:57 | D | - best error = [ 5247.4407, 5243.3890, 5240.7816, 5238.7250, 5234.0883, 5230.1424, 5228.3854, 5226.0978, 5222.2076, 5219.1133] +25-08-28 14:44:58 | D | - iter = [ 20] +25-08-28 14:44:58 | D | - error = [ 5219.5762] +25-08-28 14:44:58 | D | - best error = [ 5219.1133] +25-08-28 14:44:59 | D | + Adding low-rank branches to single_transformer_blocks.1.proj_out.linears.0 +25-08-28 14:44:59 | D | - Calibrating low-rank branch for single_transformer_blocks.1.proj_mlp +25-08-28 14:44:59 | D | + w: sint4 +25-08-28 14:44:59 | D | + x: sint4 +25-08-28 14:44:59 | D | + y: None +25-08-28 14:44:59 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:44:59 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:44:59 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:45:01 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:45:22 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:45:22 | D | - error = [ 7008.2380, 6960.7505, 6930.4623, 6908.0973, 6889.1207, 6874.9383, 6864.9839, 6857.1781, 6850.1702, 6842.7094] +25-08-28 14:45:22 | D | - best error = [ 7008.2380, 6960.7505, 6930.4623, 6908.0973, 6889.1207, 6874.9383, 6864.9839, 6857.1781, 6850.1702, 6842.7094] +25-08-28 14:45:44 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:45:44 | D | - error = [ 6839.2745, 6834.6766, 6831.6206, 6827.5674, 6826.0481, 6823.1163, 6820.8369, 6819.8038, 6817.2373, 6816.3715] +25-08-28 14:45:44 | D | - best error = [ 6839.2745, 6834.6766, 6831.6206, 6827.5674, 6826.0481, 6823.1163, 6820.8369, 6819.8038, 6817.2373, 6816.3715] +25-08-28 14:46:06 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 14:46:06 | D | - error = [ 6814.8168, 6812.6609, 6811.6967, 6810.0811, 6808.6486, 6807.9800, 6806.8319, 6805.9985, 6804.0917, 6803.1754] +25-08-28 14:46:06 | D | - best error = [ 6814.8168, 6812.6609, 6811.6967, 6810.0811, 6808.6486, 6807.9800, 6806.8319, 6805.9985, 6804.0917, 6803.1754] +25-08-28 14:46:08 | D | - iter = [ 30] +25-08-28 14:46:08 | D | - error = [ 6803.4097] +25-08-28 14:46:08 | D | - best error = [ 6803.1754] +25-08-28 14:46:09 | D | + Adding low-rank branches to single_transformer_blocks.1.proj_mlp +25-08-28 14:46:09 | D | - Calibrating low-rank branch for single_transformer_blocks.1.proj_out.linears.1.linear +25-08-28 14:46:09 | D | + w: sint4 +25-08-28 14:46:09 | D | + x: sint4 +25-08-28 14:46:09 | D | + y: None +25-08-28 14:46:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:46:09 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:46:09 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:46:14 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:46:37 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 14:46:37 | D | - error = [15436.5405, 15403.0350, 15378.7963, 15338.2544, 15323.6507, 15305.8697, 15310.2418] +25-08-28 14:46:37 | D | - best error = [15436.5405, 15403.0350, 15378.7963, 15338.2544, 15323.6507, 15305.8697, 15305.8697] +25-08-28 14:46:37 | D | + Adding low-rank branches to single_transformer_blocks.1.proj_out.linears.1.linear +25-08-28 14:46:55 | D | - Calibrating low-rank branches of block single_transformer_blocks.2 +25-08-28 14:46:55 | D | - Calibrating low-rank branch for single_transformer_blocks.2.attn.to_q, single_transformer_blocks.2.attn.to_k, single_transformer_blocks.2.attn.to_v +25-08-28 14:46:55 | D | + w: sint4 +25-08-28 14:46:55 | D | + x: sint4 +25-08-28 14:46:55 | D | + y: None +25-08-28 14:46:55 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:46:55 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:46:55 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:46:57 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:47:12 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 14:47:12 | D | - error = [12166.3727, 11745.2561, 11426.6058, 11225.2056, 11147.1948, 11082.9097, 11113.3966] +25-08-28 14:47:12 | D | - best error = [12166.3727, 11745.2561, 11426.6058, 11225.2056, 11147.1948, 11082.9097, 11082.9097] +25-08-28 14:47:12 | D | + Adding low-rank branches to single_transformer_blocks.2.attn.to_q, single_transformer_blocks.2.attn.to_k, single_transformer_blocks.2.attn.to_v +25-08-28 14:47:12 | D | - Calibrating low-rank branch for single_transformer_blocks.2.proj_out.linears.0 +25-08-28 14:47:12 | D | + w: sint4 +25-08-28 14:47:12 | D | + x: sint4 +25-08-28 14:47:12 | D | + y: None +25-08-28 14:47:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:47:12 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:47:12 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:47:14 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:47:29 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:47:29 | D | - error = [ 5335.0554, 5281.3216, 5244.5892, 5219.1842, 5209.6265, 5195.4164, 5184.1194, 5172.6011, 5167.2842, 5161.7948] +25-08-28 14:47:29 | D | - best error = [ 5335.0554, 5281.3216, 5244.5892, 5219.1842, 5209.6265, 5195.4164, 5184.1194, 5172.6011, 5167.2842, 5161.7948] +25-08-28 14:47:43 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:47:43 | D | - error = [ 5159.2713, 5153.0760, 5148.8832, 5145.1157, 5140.9834, 5139.8601, 5137.1850, 5134.0056, 5128.2829, 5127.9470] +25-08-28 14:47:43 | D | - best error = [ 5159.2713, 5153.0760, 5148.8832, 5145.1157, 5140.9834, 5139.8601, 5137.1850, 5134.0056, 5128.2829, 5127.9470] +25-08-28 14:47:59 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 14:47:59 | D | - error = [ 5126.2688, 5122.8164, 5120.9030, 5119.3177, 5119.1841, 5118.3138, 5116.4523, 5114.0258, 5113.7990, 5113.0199] +25-08-28 14:47:59 | D | - best error = [ 5126.2688, 5122.8164, 5120.9030, 5119.3177, 5119.1841, 5118.3138, 5116.4523, 5114.0258, 5113.7990, 5113.0199] +25-08-28 14:48:03 | D | - iter = [ 30, 31] +25-08-28 14:48:03 | D | - error = [ 5111.3928, 5112.3441] +25-08-28 14:48:03 | D | - best error = [ 5111.3928, 5111.3928] +25-08-28 14:48:03 | D | + Adding low-rank branches to single_transformer_blocks.2.proj_out.linears.0 +25-08-28 14:48:03 | D | - Calibrating low-rank branch for single_transformer_blocks.2.proj_mlp +25-08-28 14:48:03 | D | + w: sint4 +25-08-28 14:48:03 | D | + x: sint4 +25-08-28 14:48:03 | D | + y: None +25-08-28 14:48:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:48:03 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:48:03 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:48:05 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:48:26 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:48:26 | D | - error = [ 8146.9427, 8076.8878, 8027.9195, 7996.4752, 7970.3937, 7952.4618, 7938.4152, 7926.6028, 7917.7065, 7910.2324] +25-08-28 14:48:26 | D | - best error = [ 8146.9427, 8076.8878, 8027.9195, 7996.4752, 7970.3937, 7952.4618, 7938.4152, 7926.6028, 7917.7065, 7910.2324] +25-08-28 14:48:48 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:48:48 | D | - error = [ 7901.6072, 7896.2069, 7889.9062, 7886.6794, 7880.6403, 7876.4558, 7873.5651, 7871.0213, 7868.5381, 7866.8230] +25-08-28 14:48:48 | D | - best error = [ 7901.6072, 7896.2069, 7889.9062, 7886.6794, 7880.6403, 7876.4558, 7873.5651, 7871.0213, 7868.5381, 7866.8230] +25-08-28 14:49:10 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 14:49:10 | D | - error = [ 7864.8008, 7862.3063, 7860.6348, 7858.1764, 7856.3287, 7854.4713, 7853.6662, 7853.4701, 7851.2134, 7849.7144] +25-08-28 14:49:10 | D | - best error = [ 7864.8008, 7862.3063, 7860.6348, 7858.1764, 7856.3287, 7854.4713, 7853.6662, 7853.4701, 7851.2134, 7849.7144] +25-08-28 14:49:30 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38] +25-08-28 14:49:30 | D | - error = [ 7848.2645, 7847.4890, 7846.2585, 7845.7390, 7844.4181, 7844.1377, 7842.9265, 7842.3767, 7843.4360] +25-08-28 14:49:30 | D | - best error = [ 7848.2645, 7847.4890, 7846.2585, 7845.7390, 7844.4181, 7844.1377, 7842.9265, 7842.3767, 7842.3767] +25-08-28 14:49:30 | D | + Adding low-rank branches to single_transformer_blocks.2.proj_mlp +25-08-28 14:49:30 | D | - Calibrating low-rank branch for single_transformer_blocks.2.proj_out.linears.1.linear +25-08-28 14:49:30 | D | + w: sint4 +25-08-28 14:49:30 | D | + x: sint4 +25-08-28 14:49:30 | D | + y: None +25-08-28 14:49:30 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:49:30 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:49:30 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:49:35 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:49:54 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 14:49:55 | D | - error = [14715.7179, 14685.7269, 14675.9231, 14651.0472, 14640.9590, 14641.7835] +25-08-28 14:49:55 | D | - best error = [14715.7179, 14685.7269, 14675.9231, 14651.0472, 14640.9590, 14640.9590] +25-08-28 14:49:55 | D | + Adding low-rank branches to single_transformer_blocks.2.proj_out.linears.1.linear +25-08-28 14:50:13 | D | - Calibrating low-rank branches of block single_transformer_blocks.3 +25-08-28 14:50:13 | D | - Calibrating low-rank branch for single_transformer_blocks.3.attn.to_q, single_transformer_blocks.3.attn.to_k, single_transformer_blocks.3.attn.to_v +25-08-28 14:50:13 | D | + w: sint4 +25-08-28 14:50:13 | D | + x: sint4 +25-08-28 14:50:13 | D | + y: None +25-08-28 14:50:13 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:50:13 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:50:13 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:50:14 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:50:25 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-28 14:50:25 | D | - error = [13090.6000, 12876.0988, 12604.1536, 12424.5872, 12462.8658] +25-08-28 14:50:25 | D | - best error = [13090.6000, 12876.0988, 12604.1536, 12424.5872, 12424.5872] +25-08-28 14:50:25 | D | + Adding low-rank branches to single_transformer_blocks.3.attn.to_q, single_transformer_blocks.3.attn.to_k, single_transformer_blocks.3.attn.to_v +25-08-28 14:50:26 | D | - Calibrating low-rank branch for single_transformer_blocks.3.proj_out.linears.0 +25-08-28 14:50:26 | D | + w: sint4 +25-08-28 14:50:26 | D | + x: sint4 +25-08-28 14:50:26 | D | + y: None +25-08-28 14:50:26 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:50:26 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:50:26 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:50:26 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:50:42 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:50:42 | D | - error = [ 5903.8943, 5844.1057, 5807.6147, 5781.1511, 5755.1308, 5741.4075, 5728.1899, 5719.5106, 5709.6655, 5702.6378] +25-08-28 14:50:42 | D | - best error = [ 5903.8943, 5844.1057, 5807.6147, 5781.1511, 5755.1308, 5741.4075, 5728.1899, 5719.5106, 5709.6655, 5702.6378] +25-08-28 14:50:56 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:50:56 | D | - error = [ 5699.6712, 5694.4153, 5689.0679, 5683.8392, 5679.2632, 5676.1246, 5673.8703, 5673.0318, 5670.8493, 5665.4802] +25-08-28 14:50:56 | D | - best error = [ 5699.6712, 5694.4153, 5689.0679, 5683.8392, 5679.2632, 5676.1246, 5673.8703, 5673.0318, 5670.8493, 5665.4802] +25-08-28 14:51:04 | D | - iter = [ 20, 21, 22, 23, 24] +25-08-28 14:51:04 | D | - error = [ 5662.8375, 5660.7964, 5658.1891, 5656.4462, 5656.7475] +25-08-28 14:51:04 | D | - best error = [ 5662.8375, 5660.7964, 5658.1891, 5656.4462, 5656.4462] +25-08-28 14:51:04 | D | + Adding low-rank branches to single_transformer_blocks.3.proj_out.linears.0 +25-08-28 14:51:04 | D | - Calibrating low-rank branch for single_transformer_blocks.3.proj_mlp +25-08-28 14:51:04 | D | + w: sint4 +25-08-28 14:51:04 | D | + x: sint4 +25-08-28 14:51:04 | D | + y: None +25-08-28 14:51:04 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:51:04 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:51:04 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:51:06 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:51:28 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:51:28 | D | - error = [ 7305.1858, 7246.2832, 7207.0707, 7177.3338, 7153.1540, 7135.8565, 7121.2718, 7112.2342, 7101.7908, 7094.4795] +25-08-28 14:51:28 | D | - best error = [ 7305.1858, 7246.2832, 7207.0707, 7177.3338, 7153.1540, 7135.8565, 7121.2718, 7112.2342, 7101.7908, 7094.4795] +25-08-28 14:51:50 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:51:50 | D | - error = [ 7087.5319, 7080.8161, 7076.8875, 7073.6002, 7070.8311, 7067.4377, 7064.2871, 7061.7114, 7058.5718, 7056.6989] +25-08-28 14:51:50 | D | - best error = [ 7087.5319, 7080.8161, 7076.8875, 7073.6002, 7070.8311, 7067.4377, 7064.2871, 7061.7114, 7058.5718, 7056.6989] +25-08-28 14:52:12 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 14:52:12 | D | - error = [ 7055.2500, 7052.2499, 7049.9552, 7048.3189, 7046.0738, 7044.5034, 7043.9284, 7043.2981, 7043.0934, 7041.7421] +25-08-28 14:52:12 | D | - best error = [ 7055.2500, 7052.2499, 7049.9552, 7048.3189, 7046.0738, 7044.5034, 7043.9284, 7043.2981, 7043.0934, 7041.7421] +25-08-28 14:52:33 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-28 14:52:33 | D | - error = [ 7040.4932, 7039.2198, 7038.4251, 7037.7499, 7036.9539, 7036.4090, 7035.3306, 7034.0097, 7033.1396, 7033.0068] +25-08-28 14:52:33 | D | - best error = [ 7040.4932, 7039.2198, 7038.4251, 7037.7499, 7036.9539, 7036.4090, 7035.3306, 7034.0097, 7033.1396, 7033.0068] +25-08-28 14:52:38 | D | - iter = [ 40, 41] +25-08-28 14:52:38 | D | - error = [ 7032.4457, 7033.1597] +25-08-28 14:52:38 | D | - best error = [ 7032.4457, 7032.4457] +25-08-28 14:52:38 | D | + Adding low-rank branches to single_transformer_blocks.3.proj_mlp +25-08-28 14:52:38 | D | - Calibrating low-rank branch for single_transformer_blocks.3.proj_out.linears.1.linear +25-08-28 14:52:38 | D | + w: sint4 +25-08-28 14:52:38 | D | + x: sint4 +25-08-28 14:52:38 | D | + y: None +25-08-28 14:52:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:52:38 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:52:38 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:52:43 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:53:15 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:53:15 | D | - error = [ 2615.3502, 2607.9638, 2603.3607, 2598.0107, 2595.5501, 2594.2032, 2593.2179, 2591.5435, 2589.9743, 2587.6647] +25-08-28 14:53:15 | D | - best error = [ 2615.3502, 2607.9638, 2603.3607, 2598.0107, 2595.5501, 2594.2032, 2593.2179, 2591.5435, 2589.9743, 2587.6647] +25-08-28 14:53:34 | D | - iter = [ 10, 11, 12, 13, 14, 15] +25-08-28 14:53:34 | D | - error = [ 2585.5353, 2584.6384, 2583.8003, 2581.9643, 2579.9114, 2583.0491] +25-08-28 14:53:34 | D | - best error = [ 2585.5353, 2584.6384, 2583.8003, 2581.9643, 2579.9114, 2579.9114] +25-08-28 14:53:35 | D | + Adding low-rank branches to single_transformer_blocks.3.proj_out.linears.1.linear +25-08-28 14:53:53 | D | - Calibrating low-rank branches of block single_transformer_blocks.4 +25-08-28 14:53:53 | D | - Calibrating low-rank branch for single_transformer_blocks.4.attn.to_q, single_transformer_blocks.4.attn.to_k, single_transformer_blocks.4.attn.to_v +25-08-28 14:53:53 | D | + w: sint4 +25-08-28 14:53:53 | D | + x: sint4 +25-08-28 14:53:53 | D | + y: None +25-08-28 14:53:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:53:53 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:53:53 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:53:54 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:54:03 | D | - iter = [ 0, 1, 2, 3] +25-08-28 14:54:03 | D | - error = [13807.9305, 13760.0663, 13648.7438, 13649.2069] +25-08-28 14:54:03 | D | - best error = [13807.9305, 13760.0663, 13648.7438, 13648.7438] +25-08-28 14:54:03 | D | + Adding low-rank branches to single_transformer_blocks.4.attn.to_q, single_transformer_blocks.4.attn.to_k, single_transformer_blocks.4.attn.to_v +25-08-28 14:54:03 | D | - Calibrating low-rank branch for single_transformer_blocks.4.proj_out.linears.0 +25-08-28 14:54:03 | D | + w: sint4 +25-08-28 14:54:03 | D | + x: sint4 +25-08-28 14:54:03 | D | + y: None +25-08-28 14:54:03 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:54:03 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:54:03 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:54:04 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:54:19 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:54:19 | D | - error = [ 5941.0477, 5874.6962, 5835.0225, 5811.8916, 5794.4014, 5775.4248, 5766.7535, 5755.2967, 5748.5682, 5741.3906] +25-08-28 14:54:19 | D | - best error = [ 5941.0477, 5874.6962, 5835.0225, 5811.8916, 5794.4014, 5775.4248, 5766.7535, 5755.2967, 5748.5682, 5741.3906] +25-08-28 14:54:34 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:54:34 | D | - error = [ 5732.7226, 5725.3395, 5723.6691, 5717.9939, 5713.9664, 5710.7831, 5707.5666, 5706.2986, 5705.9970, 5702.6162] +25-08-28 14:54:34 | D | - best error = [ 5732.7226, 5725.3395, 5723.6691, 5717.9939, 5713.9664, 5710.7831, 5707.5666, 5706.2986, 5705.9970, 5702.6162] +25-08-28 14:54:37 | D | - iter = [ 20, 21] +25-08-28 14:54:37 | D | - error = [ 5698.7995, 5699.3912] +25-08-28 14:54:37 | D | - best error = [ 5698.7995, 5698.7995] +25-08-28 14:54:37 | D | + Adding low-rank branches to single_transformer_blocks.4.proj_out.linears.0 +25-08-28 14:54:37 | D | - Calibrating low-rank branch for single_transformer_blocks.4.proj_mlp +25-08-28 14:54:37 | D | + w: sint4 +25-08-28 14:54:37 | D | + x: sint4 +25-08-28 14:54:37 | D | + y: None +25-08-28 14:54:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:54:37 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:54:37 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:54:39 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:55:00 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:55:00 | D | - error = [ 6950.7404, 6898.9796, 6865.0669, 6840.1544, 6823.3229, 6808.3237, 6797.9629, 6786.4807, 6778.3786, 6770.6567] +25-08-28 14:55:00 | D | - best error = [ 6950.7404, 6898.9796, 6865.0669, 6840.1544, 6823.3229, 6808.3237, 6797.9629, 6786.4807, 6778.3786, 6770.6567] +25-08-28 14:55:22 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:55:22 | D | - error = [ 6765.1406, 6759.5541, 6754.8412, 6752.2684, 6748.0480, 6743.8873, 6741.9244, 6739.4558, 6737.5476, 6734.5206] +25-08-28 14:55:22 | D | - best error = [ 6765.1406, 6759.5541, 6754.8412, 6752.2684, 6748.0480, 6743.8873, 6741.9244, 6739.4558, 6737.5476, 6734.5206] +25-08-28 14:55:39 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27] +25-08-28 14:55:39 | D | - error = [ 6733.8420, 6731.6277, 6730.5596, 6728.4560, 6727.3213, 6725.5480, 6724.8007, 6725.7582] +25-08-28 14:55:39 | D | - best error = [ 6733.8420, 6731.6277, 6730.5596, 6728.4560, 6727.3213, 6725.5480, 6724.8007, 6724.8007] +25-08-28 14:55:40 | D | + Adding low-rank branches to single_transformer_blocks.4.proj_mlp +25-08-28 14:55:40 | D | - Calibrating low-rank branch for single_transformer_blocks.4.proj_out.linears.1.linear +25-08-28 14:55:40 | D | + w: sint4 +25-08-28 14:55:40 | D | + x: sint4 +25-08-28 14:55:40 | D | + y: None +25-08-28 14:55:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:55:40 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:55:40 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:55:45 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:56:14 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 14:56:14 | D | - error = [12872.7421, 12852.8503, 12834.6188, 12817.6241, 12784.1927, 12775.0995, 12758.6238, 12752.4015, 12757.1376] +25-08-28 14:56:14 | D | - best error = [12872.7421, 12852.8503, 12834.6188, 12817.6241, 12784.1927, 12775.0995, 12758.6238, 12752.4015, 12752.4015] +25-08-28 14:56:14 | D | + Adding low-rank branches to single_transformer_blocks.4.proj_out.linears.1.linear +25-08-28 14:56:33 | D | - Calibrating low-rank branches of block single_transformer_blocks.5 +25-08-28 14:56:33 | D | - Calibrating low-rank branch for single_transformer_blocks.5.attn.to_q, single_transformer_blocks.5.attn.to_k, single_transformer_blocks.5.attn.to_v +25-08-28 14:56:33 | D | + w: sint4 +25-08-28 14:56:33 | D | + x: sint4 +25-08-28 14:56:33 | D | + y: None +25-08-28 14:56:33 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:56:33 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:56:33 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:56:34 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:56:51 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 14:56:51 | D | - error = [14328.4534, 13987.2113, 13980.6209, 13828.3419, 13737.6836, 13695.1022, 13568.2836, 13675.1260] +25-08-28 14:56:51 | D | - best error = [14328.4534, 13987.2113, 13980.6209, 13828.3419, 13737.6836, 13695.1022, 13568.2836, 13568.2836] +25-08-28 14:56:52 | D | + Adding low-rank branches to single_transformer_blocks.5.attn.to_q, single_transformer_blocks.5.attn.to_k, single_transformer_blocks.5.attn.to_v +25-08-28 14:56:52 | D | - Calibrating low-rank branch for single_transformer_blocks.5.proj_out.linears.0 +25-08-28 14:56:52 | D | + w: sint4 +25-08-28 14:56:52 | D | + x: sint4 +25-08-28 14:56:52 | D | + y: None +25-08-28 14:56:52 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:56:52 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:56:52 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:56:53 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:57:08 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:57:08 | D | - error = [ 5099.1301, 5054.1684, 5017.0471, 4994.8762, 4975.5024, 4959.4980, 4950.7945, 4943.9577, 4937.0588, 4929.6406] +25-08-28 14:57:08 | D | - best error = [ 5099.1301, 5054.1684, 5017.0471, 4994.8762, 4975.5024, 4959.4980, 4950.7945, 4943.9577, 4937.0588, 4929.6406] +25-08-28 14:57:23 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:57:23 | D | - error = [ 4925.9742, 4920.4451, 4917.1770, 4912.8483, 4910.5466, 4907.8845, 4905.2918, 4903.6988, 4901.3235, 4900.3443] +25-08-28 14:57:23 | D | - best error = [ 4925.9742, 4920.4451, 4917.1770, 4912.8483, 4910.5466, 4907.8845, 4905.2918, 4903.6988, 4901.3235, 4900.3443] +25-08-28 14:57:39 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 14:57:39 | D | - error = [ 4898.5620, 4896.4262, 4896.0972, 4894.1087, 4892.7162, 4891.8420, 4888.8369, 4887.8711, 4886.3189, 4886.0082] +25-08-28 14:57:39 | D | - best error = [ 4898.5620, 4896.4262, 4896.0972, 4894.1087, 4892.7162, 4891.8420, 4888.8369, 4887.8711, 4886.3189, 4886.0082] +25-08-28 14:57:40 | D | - iter = [ 30] +25-08-28 14:57:40 | D | - error = [ 4886.4091] +25-08-28 14:57:40 | D | - best error = [ 4886.0082] +25-08-28 14:57:41 | D | + Adding low-rank branches to single_transformer_blocks.5.proj_out.linears.0 +25-08-28 14:57:41 | D | - Calibrating low-rank branch for single_transformer_blocks.5.proj_mlp +25-08-28 14:57:41 | D | + w: sint4 +25-08-28 14:57:41 | D | + x: sint4 +25-08-28 14:57:41 | D | + y: None +25-08-28 14:57:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:57:41 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:57:41 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:57:43 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:58:04 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:58:04 | D | - error = [ 6977.3436, 6913.7263, 6873.6799, 6846.2851, 6826.9528, 6811.4598, 6798.4372, 6787.6818, 6775.9053, 6769.6795] +25-08-28 14:58:04 | D | - best error = [ 6977.3436, 6913.7263, 6873.6799, 6846.2851, 6826.9528, 6811.4598, 6798.4372, 6787.6818, 6775.9053, 6769.6795] +25-08-28 14:58:26 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 14:58:26 | D | - error = [ 6764.0920, 6758.3383, 6754.2493, 6750.6263, 6745.9333, 6742.5915, 6740.1733, 6738.4559, 6736.2925, 6733.2070] +25-08-28 14:58:26 | D | - best error = [ 6764.0920, 6758.3383, 6754.2493, 6750.6263, 6745.9333, 6742.5915, 6740.1733, 6738.4559, 6736.2925, 6733.2070] +25-08-28 14:58:48 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 14:58:48 | D | - error = [ 6730.1848, 6728.4801, 6727.0053, 6725.9798, 6724.1434, 6723.6026, 6722.2865, 6721.1416, 6720.1705, 6720.3768] +25-08-28 14:58:48 | D | - best error = [ 6730.1848, 6728.4801, 6727.0053, 6725.9798, 6724.1434, 6723.6026, 6722.2865, 6721.1416, 6720.1705, 6720.1705] +25-08-28 14:58:48 | D | + Adding low-rank branches to single_transformer_blocks.5.proj_mlp +25-08-28 14:58:48 | D | - Calibrating low-rank branch for single_transformer_blocks.5.proj_out.linears.1.linear +25-08-28 14:58:48 | D | + w: sint4 +25-08-28 14:58:48 | D | + x: sint4 +25-08-28 14:58:48 | D | + y: None +25-08-28 14:58:48 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:58:48 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:58:48 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:58:53 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:59:25 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 14:59:25 | D | - error = [13845.0130, 13817.3769, 13781.2632, 13755.3574, 13748.5918, 13721.4816, 13714.5511, 13709.2191, 13696.3849, 13689.5154] +25-08-28 14:59:25 | D | - best error = [13845.0130, 13817.3769, 13781.2632, 13755.3574, 13748.5918, 13721.4816, 13714.5511, 13709.2191, 13696.3849, 13689.5154] +25-08-28 14:59:29 | D | - iter = [ 10] +25-08-28 14:59:29 | D | - error = [13692.4037] +25-08-28 14:59:29 | D | - best error = [13689.5154] +25-08-28 14:59:29 | D | + Adding low-rank branches to single_transformer_blocks.5.proj_out.linears.1.linear +25-08-28 14:59:47 | D | - Calibrating low-rank branches of block single_transformer_blocks.6 +25-08-28 14:59:47 | D | - Calibrating low-rank branch for single_transformer_blocks.6.attn.to_q, single_transformer_blocks.6.attn.to_k, single_transformer_blocks.6.attn.to_v +25-08-28 14:59:47 | D | + w: sint4 +25-08-28 14:59:47 | D | + x: sint4 +25-08-28 14:59:47 | D | + y: None +25-08-28 14:59:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:59:47 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:59:47 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:59:49 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 14:59:57 | D | - iter = [ 0, 1, 2, 3] +25-08-28 14:59:57 | D | - error = [17340.3871, 17152.9170, 16830.2756, 17105.0458] +25-08-28 14:59:57 | D | - best error = [17340.3871, 17152.9170, 16830.2756, 16830.2756] +25-08-28 14:59:58 | D | + Adding low-rank branches to single_transformer_blocks.6.attn.to_q, single_transformer_blocks.6.attn.to_k, single_transformer_blocks.6.attn.to_v +25-08-28 14:59:58 | D | - Calibrating low-rank branch for single_transformer_blocks.6.proj_out.linears.0 +25-08-28 14:59:58 | D | + w: sint4 +25-08-28 14:59:58 | D | + x: sint4 +25-08-28 14:59:58 | D | + y: None +25-08-28 14:59:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 14:59:58 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 14:59:58 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 14:59:59 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 15:00:14 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:00:14 | D | - error = [ 4604.6598, 4557.9000, 4527.3847, 4500.7924, 4484.4217, 4472.2171, 4462.1443, 4454.5307, 4448.8882, 4439.8275] +25-08-28 15:00:14 | D | - best error = [ 4604.6598, 4557.9000, 4527.3847, 4500.7924, 4484.4217, 4472.2171, 4462.1443, 4454.5307, 4448.8882, 4439.8275] +25-08-28 15:00:29 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:00:29 | D | - error = [ 4435.0085, 4430.7444, 4427.1982, 4424.6433, 4420.8230, 4418.2951, 4417.9185, 4416.1807, 4413.2260, 4411.0618] +25-08-28 15:00:29 | D | - best error = [ 4435.0085, 4430.7444, 4427.1982, 4424.6433, 4420.8230, 4418.2951, 4417.9185, 4416.1807, 4413.2260, 4411.0618] +25-08-28 15:00:30 | D | - iter = [ 20] +25-08-28 15:00:30 | D | - error = [ 4411.7715] +25-08-28 15:00:30 | D | - best error = [ 4411.0618] +25-08-28 15:00:30 | D | + Adding low-rank branches to single_transformer_blocks.6.proj_out.linears.0 +25-08-28 15:00:31 | D | - Calibrating low-rank branch for single_transformer_blocks.6.proj_mlp +25-08-28 15:00:31 | D | + w: sint4 +25-08-28 15:00:31 | D | + x: sint4 +25-08-28 15:00:31 | D | + y: None +25-08-28 15:00:31 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:00:31 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 15:00:31 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 15:00:32 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 15:00:54 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:00:54 | D | - error = [ 6824.7116, 6775.6535, 6740.9833, 6716.6641, 6702.3339, 6689.4933, 6676.9017, 6667.8815, 6661.5133, 6655.3209] +25-08-28 15:00:54 | D | - best error = [ 6824.7116, 6775.6535, 6740.9833, 6716.6641, 6702.3339, 6689.4933, 6676.9017, 6667.8815, 6661.5133, 6655.3209] +25-08-28 15:01:16 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:01:16 | D | - error = [ 6649.1281, 6644.3501, 6639.1072, 6637.0662, 6631.5724, 6629.7864, 6628.6234, 6625.4295, 6623.6132, 6622.7668] +25-08-28 15:01:16 | D | - best error = [ 6649.1281, 6644.3501, 6639.1072, 6637.0662, 6631.5724, 6629.7864, 6628.6234, 6625.4295, 6623.6132, 6622.7668] +25-08-28 15:01:37 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 15:01:37 | D | - error = [ 6620.8788, 6618.8522, 6618.7126, 6617.4853, 6616.3618, 6615.7068, 6614.3508, 6613.5742, 6611.7728, 6611.7275] +25-08-28 15:01:37 | D | - best error = [ 6620.8788, 6618.8522, 6618.7126, 6617.4853, 6616.3618, 6615.7068, 6614.3508, 6613.5742, 6611.7728, 6611.7275] +25-08-28 15:01:52 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36] +25-08-28 15:01:52 | D | - error = [ 6611.3354, 6610.0560, 6609.1034, 6607.9089, 6606.1054, 6605.9295, 6606.2785] +25-08-28 15:01:52 | D | - best error = [ 6611.3354, 6610.0560, 6609.1034, 6607.9089, 6606.1054, 6605.9295, 6605.9295] +25-08-28 15:01:53 | D | + Adding low-rank branches to single_transformer_blocks.6.proj_mlp +25-08-28 15:01:53 | D | - Calibrating low-rank branch for single_transformer_blocks.6.proj_out.linears.1.linear +25-08-28 15:01:53 | D | + w: sint4 +25-08-28 15:01:53 | D | + x: sint4 +25-08-28 15:01:53 | D | + y: None +25-08-28 15:01:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:01:53 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 15:01:53 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 15:01:58 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 15:02:23 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 15:02:23 | D | - error = [12892.1396, 12860.7664, 12835.2278, 12822.0011, 12817.6241, 12811.4130, 12796.1207, 12799.3594] +25-08-28 15:02:23 | D | - best error = [12892.1396, 12860.7664, 12835.2278, 12822.0011, 12817.6241, 12811.4130, 12796.1207, 12796.1207] +25-08-28 15:02:24 | D | + Adding low-rank branches to single_transformer_blocks.6.proj_out.linears.1.linear +25-08-28 15:02:42 | D | - Calibrating low-rank branches of block single_transformer_blocks.7 +25-08-28 15:02:42 | D | - Calibrating low-rank branch for single_transformer_blocks.7.attn.to_q, single_transformer_blocks.7.attn.to_k, single_transformer_blocks.7.attn.to_v +25-08-28 15:02:42 | D | + w: sint4 +25-08-28 15:02:42 | D | + x: sint4 +25-08-28 15:02:42 | D | + y: None +25-08-28 15:02:42 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:02:42 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 15:02:42 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 15:02:43 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 15:02:57 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 15:02:57 | D | - error = [18095.7719, 17805.5200, 17360.0774, 17296.7715, 17098.3714, 17118.3373] +25-08-28 15:02:57 | D | - best error = [18095.7719, 17805.5200, 17360.0774, 17296.7715, 17098.3714, 17098.3714] +25-08-28 15:02:57 | D | + Adding low-rank branches to single_transformer_blocks.7.attn.to_q, single_transformer_blocks.7.attn.to_k, single_transformer_blocks.7.attn.to_v +25-08-28 15:02:57 | D | - Calibrating low-rank branch for single_transformer_blocks.7.proj_out.linears.0 +25-08-28 15:02:57 | D | + w: sint4 +25-08-28 15:02:57 | D | + x: sint4 +25-08-28 15:02:57 | D | + y: None +25-08-28 15:02:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:02:57 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 15:02:57 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 15:02:58 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 15:03:13 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:03:13 | D | - error = [ 4515.4668, 4465.6623, 4438.0063, 4417.1484, 4402.3457, 4389.6241, 4380.3397, 4372.6838, 4364.9598, 4357.8775] +25-08-28 15:03:13 | D | - best error = [ 4515.4668, 4465.6623, 4438.0063, 4417.1484, 4402.3457, 4389.6241, 4380.3397, 4372.6838, 4364.9598, 4357.8775] +25-08-28 15:03:28 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:03:28 | D | - error = [ 4353.2252, 4350.7121, 4346.9378, 4343.0998, 4338.3591, 4336.8509, 4335.1136, 4333.9822, 4330.1686, 4328.2391] +25-08-28 15:03:28 | D | - best error = [ 4353.2252, 4350.7121, 4346.9378, 4343.0998, 4338.3591, 4336.8509, 4335.1136, 4333.9822, 4330.1686, 4328.2391] +25-08-28 15:03:37 | D | - iter = [ 20, 21, 22, 23, 24, 25] +25-08-28 15:03:37 | D | - error = [ 4326.5788, 4325.2003, 4324.7335, 4323.2668, 4321.3685, 4321.4315] +25-08-28 15:03:37 | D | - best error = [ 4326.5788, 4325.2003, 4324.7335, 4323.2668, 4321.3685, 4321.3685] +25-08-28 15:03:38 | D | + Adding low-rank branches to single_transformer_blocks.7.proj_out.linears.0 +25-08-28 15:03:38 | D | - Calibrating low-rank branch for single_transformer_blocks.7.proj_mlp +25-08-28 15:03:38 | D | + w: sint4 +25-08-28 15:03:38 | D | + x: sint4 +25-08-28 15:03:38 | D | + y: None +25-08-28 15:03:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:03:38 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 15:03:38 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 15:03:40 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 15:04:02 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:04:02 | D | - error = [ 6981.4394, 6930.2141, 6896.9776, 6875.6347, 6858.0656, 6845.4697, 6834.0304, 6825.1142, 6815.1003, 6810.0073] +25-08-28 15:04:02 | D | - best error = [ 6981.4394, 6930.2141, 6896.9776, 6875.6347, 6858.0656, 6845.4697, 6834.0304, 6825.1142, 6815.1003, 6810.0073] +25-08-28 15:04:24 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:04:24 | D | - error = [ 6805.2315, 6800.4029, 6795.5865, 6793.3794, 6789.4889, 6786.6996, 6783.4947, 6779.5849, 6776.7842, 6775.3007] +25-08-28 15:04:24 | D | - best error = [ 6805.2315, 6800.4029, 6795.5865, 6793.3794, 6789.4889, 6786.6996, 6783.4947, 6779.5849, 6776.7842, 6775.3007] +25-08-28 15:04:37 | D | - iter = [ 20, 21, 22, 23, 24, 25] +25-08-28 15:04:37 | D | - error = [ 6774.0965, 6773.9793, 6772.5608, 6771.5850, 6770.9831, 6771.1018] +25-08-28 15:04:37 | D | - best error = [ 6774.0965, 6773.9793, 6772.5608, 6771.5850, 6770.9831, 6770.9831] +25-08-28 15:04:37 | D | + Adding low-rank branches to single_transformer_blocks.7.proj_mlp +25-08-28 15:04:37 | D | - Calibrating low-rank branch for single_transformer_blocks.7.proj_out.linears.1.linear +25-08-28 15:04:37 | D | + w: sint4 +25-08-28 15:04:37 | D | + x: sint4 +25-08-28 15:04:37 | D | + y: None +25-08-28 15:04:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:04:37 | D | + finished parsing calibration arguments, ram usage: 15.5 +25-08-28 15:04:37 | D | + finished resetting calibrator, ram usage: 15.5 +25-08-28 15:04:42 | D | + finished calculating the original outputs, ram usage: 16.4 +25-08-28 15:05:05 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 15:05:05 | D | - error = [13864.6538, 13824.4491, 13795.0379, 13739.0550, 13736.2504, 13728.7686, 13733.3707] +25-08-28 15:05:05 | D | - best error = [13864.6538, 13824.4491, 13795.0379, 13739.0550, 13736.2504, 13728.7686, 13728.7686] +25-08-28 15:05:05 | D | + Adding low-rank branches to single_transformer_blocks.7.proj_out.linears.1.linear +25-08-28 15:05:23 | D | - Calibrating low-rank branches of block single_transformer_blocks.8 +25-08-28 15:05:23 | D | - Calibrating low-rank branch for single_transformer_blocks.8.attn.to_q, single_transformer_blocks.8.attn.to_k, single_transformer_blocks.8.attn.to_v +25-08-28 15:05:23 | D | + w: sint4 +25-08-28 15:05:23 | D | + x: sint4 +25-08-28 15:05:23 | D | + y: None +25-08-28 15:05:23 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:05:23 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 15:05:23 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 15:05:25 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 15:05:35 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-28 15:05:35 | D | - error = [19857.8774, 19168.5910, 19040.1580, 18631.2400, 18745.0823] +25-08-28 15:05:35 | D | - best error = [19857.8774, 19168.5910, 19040.1580, 18631.2400, 18631.2400] +25-08-28 15:05:36 | D | + Adding low-rank branches to single_transformer_blocks.8.attn.to_q, single_transformer_blocks.8.attn.to_k, single_transformer_blocks.8.attn.to_v +25-08-28 15:05:36 | D | - Calibrating low-rank branch for single_transformer_blocks.8.proj_out.linears.0 +25-08-28 15:05:36 | D | + w: sint4 +25-08-28 15:05:36 | D | + x: sint4 +25-08-28 15:05:36 | D | + y: None +25-08-28 15:05:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:05:36 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 15:05:36 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 15:05:37 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 15:05:52 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:05:52 | D | - error = [ 5286.1300, 5232.8721, 5202.5863, 5177.0242, 5157.9356, 5145.9464, 5135.8047, 5123.7463, 5116.4200, 5110.9083] +25-08-28 15:05:52 | D | - best error = [ 5286.1300, 5232.8721, 5202.5863, 5177.0242, 5157.9356, 5145.9464, 5135.8047, 5123.7463, 5116.4200, 5110.9083] +25-08-28 15:06:07 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:06:07 | D | - error = [ 5105.9491, 5099.2350, 5095.7724, 5088.6381, 5086.7837, 5085.7072, 5082.6076, 5079.4559, 5076.4631, 5076.9609] +25-08-28 15:06:07 | D | - best error = [ 5105.9491, 5099.2350, 5095.7724, 5088.6381, 5086.7837, 5085.7072, 5082.6076, 5079.4559, 5076.4631, 5076.4631] +25-08-28 15:06:07 | D | + Adding low-rank branches to single_transformer_blocks.8.proj_out.linears.0 +25-08-28 15:06:07 | D | - Calibrating low-rank branch for single_transformer_blocks.8.proj_mlp +25-08-28 15:06:07 | D | + w: sint4 +25-08-28 15:06:07 | D | + x: sint4 +25-08-28 15:06:07 | D | + y: None +25-08-28 15:06:07 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:06:07 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 15:06:07 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 15:06:09 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 15:06:30 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:06:30 | D | - error = [ 6698.8736, 6647.8075, 6614.7892, 6591.6933, 6575.4793, 6560.2098, 6547.4062, 6538.4815, 6530.6373, 6527.1718] +25-08-28 15:06:30 | D | - best error = [ 6698.8736, 6647.8075, 6614.7892, 6591.6933, 6575.4793, 6560.2098, 6547.4062, 6538.4815, 6530.6373, 6527.1718] +25-08-28 15:06:52 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:06:52 | D | - error = [ 6522.1886, 6517.2810, 6513.3512, 6509.3576, 6504.4600, 6501.9348, 6500.4738, 6498.0379, 6496.3734, 6495.5997] +25-08-28 15:06:52 | D | - best error = [ 6522.1886, 6517.2810, 6513.3512, 6509.3576, 6504.4600, 6501.9348, 6500.4738, 6498.0379, 6496.3734, 6495.5997] +25-08-28 15:07:07 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26] +25-08-28 15:07:07 | D | - error = [ 6492.8555, 6492.0524, 6491.1509, 6488.3172, 6486.4335, 6486.0897, 6486.2504] +25-08-28 15:07:07 | D | - best error = [ 6492.8555, 6492.0524, 6491.1509, 6488.3172, 6486.4335, 6486.0897, 6486.0897] +25-08-28 15:07:07 | D | + Adding low-rank branches to single_transformer_blocks.8.proj_mlp +25-08-28 15:07:08 | D | - Calibrating low-rank branch for single_transformer_blocks.8.proj_out.linears.1.linear +25-08-28 15:07:08 | D | + w: sint4 +25-08-28 15:07:08 | D | + x: sint4 +25-08-28 15:07:08 | D | + y: None +25-08-28 15:07:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:07:08 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 15:07:08 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 15:07:13 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 15:07:54 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:07:54 | D | - error = [13149.3957, 13117.7114, 13092.5121, 13073.5207, 13054.6801, 13042.5077, 13034.9966, 13023.2340, 13007.3530, 12996.5682] +25-08-28 15:07:54 | D | - best error = [13149.3957, 13117.7114, 13092.5121, 13073.5207, 13054.6801, 13042.5077, 13034.9966, 13023.2340, 13007.3530, 12996.5682] +25-08-28 15:08:02 | D | - iter = [ 10, 11] +25-08-28 15:08:02 | D | - error = [12985.4762, 12986.4058] +25-08-28 15:08:02 | D | - best error = [12985.4762, 12985.4762] +25-08-28 15:08:03 | D | + Adding low-rank branches to single_transformer_blocks.8.proj_out.linears.1.linear +25-08-28 15:08:21 | D | - Calibrating low-rank branches of block single_transformer_blocks.9 +25-08-28 15:08:21 | D | - Calibrating low-rank branch for single_transformer_blocks.9.attn.to_q, single_transformer_blocks.9.attn.to_k, single_transformer_blocks.9.attn.to_v +25-08-28 15:08:21 | D | + w: sint4 +25-08-28 15:08:21 | D | + x: sint4 +25-08-28 15:08:21 | D | + y: None +25-08-28 15:08:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:08:21 | D | + finished parsing calibration arguments, ram usage: 17.0 +25-08-28 15:08:21 | D | + finished resetting calibrator, ram usage: 17.0 +25-08-28 15:08:23 | D | + finished calculating the original outputs, ram usage: 17.2 +25-08-28 15:08:36 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 15:08:36 | D | - error = [23011.0088, 21949.3665, 21182.1389, 21166.9251, 20971.4534, 21028.2971] +25-08-28 15:08:36 | D | - best error = [23011.0088, 21949.3665, 21182.1389, 21166.9251, 20971.4534, 20971.4534] +25-08-28 15:08:36 | D | + Adding low-rank branches to single_transformer_blocks.9.attn.to_q, single_transformer_blocks.9.attn.to_k, single_transformer_blocks.9.attn.to_v +25-08-28 15:08:36 | D | - Calibrating low-rank branch for single_transformer_blocks.9.proj_out.linears.0 +25-08-28 15:08:36 | D | + w: sint4 +25-08-28 15:08:36 | D | + x: sint4 +25-08-28 15:08:36 | D | + y: None +25-08-28 15:08:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:08:36 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 15:08:36 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 15:08:37 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 15:08:52 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:08:52 | D | - error = [ 4513.2084, 4475.8990, 4450.7981, 4425.4080, 4407.3060, 4397.9457, 4381.7841, 4376.6549, 4375.1917, 4369.6620] +25-08-28 15:08:52 | D | - best error = [ 4513.2084, 4475.8990, 4450.7981, 4425.4080, 4407.3060, 4397.9457, 4381.7841, 4376.6549, 4375.1917, 4369.6620] +25-08-28 15:08:58 | D | - iter = [ 10, 11, 12, 13] +25-08-28 15:08:58 | D | - error = [ 4364.3900, 4356.9645, 4350.5563, 4351.9963] +25-08-28 15:08:58 | D | - best error = [ 4364.3900, 4356.9645, 4350.5563, 4350.5563] +25-08-28 15:08:58 | D | + Adding low-rank branches to single_transformer_blocks.9.proj_out.linears.0 +25-08-28 15:08:59 | D | - Calibrating low-rank branch for single_transformer_blocks.9.proj_mlp +25-08-28 15:08:59 | D | + w: sint4 +25-08-28 15:08:59 | D | + x: sint4 +25-08-28 15:08:59 | D | + y: None +25-08-28 15:08:59 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:08:59 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:08:59 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:09:01 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 15:09:22 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:09:22 | D | - error = [ 6619.3338, 6570.4012, 6541.9101, 6520.1046, 6502.6702, 6489.2104, 6478.1806, 6470.2065, 6463.3168, 6457.3304] +25-08-28 15:09:22 | D | - best error = [ 6619.3338, 6570.4012, 6541.9101, 6520.1046, 6502.6702, 6489.2104, 6478.1806, 6470.2065, 6463.3168, 6457.3304] +25-08-28 15:09:46 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:09:46 | D | - error = [ 6453.0384, 6446.8200, 6440.8602, 6437.8263, 6434.9145, 6432.4742, 6429.2031, 6425.5605, 6425.1581, 6422.6627] +25-08-28 15:09:46 | D | - best error = [ 6453.0384, 6446.8200, 6440.8602, 6437.8263, 6434.9145, 6432.4742, 6429.2031, 6425.5605, 6425.1581, 6422.6627] +25-08-28 15:10:05 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28] +25-08-28 15:10:05 | D | - error = [ 6420.8006, 6419.7283, 6416.4669, 6415.0931, 6414.5631, 6414.4799, 6413.4930, 6413.4852, 6413.7019] +25-08-28 15:10:05 | D | - best error = [ 6420.8006, 6419.7283, 6416.4669, 6415.0931, 6414.5631, 6414.4799, 6413.4930, 6413.4852, 6413.4852] +25-08-28 15:10:05 | D | + Adding low-rank branches to single_transformer_blocks.9.proj_mlp +25-08-28 15:10:05 | D | - Calibrating low-rank branch for single_transformer_blocks.9.proj_out.linears.1.linear +25-08-28 15:10:05 | D | + w: sint4 +25-08-28 15:10:05 | D | + x: sint4 +25-08-28 15:10:05 | D | + y: None +25-08-28 15:10:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:10:05 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 15:10:05 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 15:10:11 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 15:10:31 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 15:10:31 | D | - error = [12648.9639, 12606.3870, 12586.4104, 12566.1539, 12559.5216, 12560.0822] +25-08-28 15:10:31 | D | - best error = [12648.9639, 12606.3870, 12586.4104, 12566.1539, 12559.5216, 12559.5216] +25-08-28 15:10:31 | D | + Adding low-rank branches to single_transformer_blocks.9.proj_out.linears.1.linear +25-08-28 15:10:49 | D | - Calibrating low-rank branches of block single_transformer_blocks.10 +25-08-28 15:10:49 | D | - Calibrating low-rank branch for single_transformer_blocks.10.attn.to_q, single_transformer_blocks.10.attn.to_k, single_transformer_blocks.10.attn.to_v +25-08-28 15:10:49 | D | + w: sint4 +25-08-28 15:10:49 | D | + x: sint4 +25-08-28 15:10:49 | D | + y: None +25-08-28 15:10:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:10:49 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 15:10:49 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 15:10:51 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 15:11:11 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 15:11:11 | D | - error = [24992.7298, 24906.1457, 24180.0632, 23974.0446, 23903.2128, 23740.0886, 23683.6105, 23531.7481, 23572.2766] +25-08-28 15:11:11 | D | - best error = [24992.7298, 24906.1457, 24180.0632, 23974.0446, 23903.2128, 23740.0886, 23683.6105, 23531.7481, 23531.7481] +25-08-28 15:11:11 | D | + Adding low-rank branches to single_transformer_blocks.10.attn.to_q, single_transformer_blocks.10.attn.to_k, single_transformer_blocks.10.attn.to_v +25-08-28 15:11:11 | D | - Calibrating low-rank branch for single_transformer_blocks.10.proj_out.linears.0 +25-08-28 15:11:11 | D | + w: sint4 +25-08-28 15:11:11 | D | + x: sint4 +25-08-28 15:11:11 | D | + y: None +25-08-28 15:11:11 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:11:11 | D | + finished parsing calibration arguments, ram usage: 16.4 +25-08-28 15:11:11 | D | + finished resetting calibrator, ram usage: 16.4 +25-08-28 15:11:13 | D | + finished calculating the original outputs, ram usage: 16.7 +25-08-28 15:11:28 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:11:28 | D | - error = [ 3762.6414, 3726.2270, 3701.2098, 3685.8117, 3667.4677, 3661.0212, 3650.4260, 3645.0193, 3641.0667, 3633.8784] +25-08-28 15:11:28 | D | - best error = [ 3762.6414, 3726.2270, 3701.2098, 3685.8117, 3667.4677, 3661.0212, 3650.4260, 3645.0193, 3641.0667, 3633.8784] +25-08-28 15:11:33 | D | - iter = [ 10, 11, 12] +25-08-28 15:11:33 | D | - error = [ 3631.6944, 3628.8453, 3629.7102] +25-08-28 15:11:33 | D | - best error = [ 3631.6944, 3628.8453, 3628.8453] +25-08-28 15:11:33 | D | + Adding low-rank branches to single_transformer_blocks.10.proj_out.linears.0 +25-08-28 15:11:33 | D | - Calibrating low-rank branch for single_transformer_blocks.10.proj_mlp +25-08-28 15:11:33 | D | + w: sint4 +25-08-28 15:11:33 | D | + x: sint4 +25-08-28 15:11:33 | D | + y: None +25-08-28 15:11:33 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:11:33 | D | + finished parsing calibration arguments, ram usage: 18.1 +25-08-28 15:11:33 | D | + finished resetting calibrator, ram usage: 18.1 +25-08-28 15:11:35 | D | + finished calculating the original outputs, ram usage: 18.1 +25-08-28 15:11:57 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:11:57 | D | - error = [ 8942.9973, 8919.1896, 8902.3907, 8890.7840, 8883.5025, 8878.7571, 8874.1350, 8872.1163, 8868.3157, 8865.0496] +25-08-28 15:11:57 | D | - best error = [ 8942.9973, 8919.1896, 8902.3907, 8890.7840, 8883.5025, 8878.7571, 8874.1350, 8872.1163, 8868.3157, 8865.0496] +25-08-28 15:12:05 | D | - iter = [ 10, 11, 12, 13] +25-08-28 15:12:05 | D | - error = [ 8861.8982, 8859.9634, 8858.7037, 8859.3309] +25-08-28 15:12:05 | D | - best error = [ 8861.8982, 8859.9634, 8858.7037, 8858.7037] +25-08-28 15:12:06 | D | + Adding low-rank branches to single_transformer_blocks.10.proj_mlp +25-08-28 15:12:06 | D | - Calibrating low-rank branch for single_transformer_blocks.10.proj_out.linears.1.linear +25-08-28 15:12:06 | D | + w: sint4 +25-08-28 15:12:06 | D | + x: sint4 +25-08-28 15:12:06 | D | + y: None +25-08-28 15:12:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:12:06 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:12:06 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:12:11 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:12:44 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:12:44 | D | - error = [12453.6452, 12408.4487, 12387.1624, 12375.5475, 12369.0174, 12341.6349, 12337.3229, 12324.6238, 12319.7929, 12330.5507] +25-08-28 15:12:44 | D | - best error = [12453.6452, 12408.4487, 12387.1624, 12375.5475, 12369.0174, 12341.6349, 12337.3229, 12324.6238, 12319.7929, 12319.7929] +25-08-28 15:12:44 | D | + Adding low-rank branches to single_transformer_blocks.10.proj_out.linears.1.linear +25-08-28 15:13:02 | D | - Calibrating low-rank branches of block single_transformer_blocks.11 +25-08-28 15:13:02 | D | - Calibrating low-rank branch for single_transformer_blocks.11.attn.to_q, single_transformer_blocks.11.attn.to_k, single_transformer_blocks.11.attn.to_v +25-08-28 15:13:02 | D | + w: sint4 +25-08-28 15:13:02 | D | + x: sint4 +25-08-28 15:13:02 | D | + y: None +25-08-28 15:13:02 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:13:02 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:13:02 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:13:03 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:13:25 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:13:25 | D | - error = [26877.3165, 26204.2855, 25784.2172, 25431.3985, 25182.7115, 24937.1485, 24764.0682, 24750.4835, 24674.4850, 24564.3957] +25-08-28 15:13:25 | D | - best error = [26877.3165, 26204.2855, 25784.2172, 25431.3985, 25182.7115, 24937.1485, 24764.0682, 24750.4835, 24674.4850, 24564.3957] +25-08-28 15:13:27 | D | - iter = [ 10] +25-08-28 15:13:27 | D | - error = [24602.5547] +25-08-28 15:13:27 | D | - best error = [24564.3957] +25-08-28 15:13:28 | D | + Adding low-rank branches to single_transformer_blocks.11.attn.to_q, single_transformer_blocks.11.attn.to_k, single_transformer_blocks.11.attn.to_v +25-08-28 15:13:28 | D | - Calibrating low-rank branch for single_transformer_blocks.11.proj_out.linears.0 +25-08-28 15:13:28 | D | + w: sint4 +25-08-28 15:13:28 | D | + x: sint4 +25-08-28 15:13:28 | D | + y: None +25-08-28 15:13:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:13:28 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:13:28 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:13:29 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:13:44 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:13:44 | D | - error = [ 4330.8847, 4287.9417, 4258.7482, 4237.4292, 4222.1158, 4213.7083, 4203.9986, 4197.1786, 4191.7459, 4184.2216] +25-08-28 15:13:44 | D | - best error = [ 4330.8847, 4287.9417, 4258.7482, 4237.4292, 4222.1158, 4213.7083, 4203.9986, 4197.1786, 4191.7459, 4184.2216] +25-08-28 15:14:00 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:14:00 | D | - error = [ 4179.6997, 4175.6394, 4174.2362, 4171.2753, 4167.9470, 4166.0382, 4164.3717, 4163.4493, 4162.8759, 4164.0444] +25-08-28 15:14:00 | D | - best error = [ 4179.6997, 4175.6394, 4174.2362, 4171.2753, 4167.9470, 4166.0382, 4164.3717, 4163.4493, 4162.8759, 4162.8759] +25-08-28 15:14:00 | D | + Adding low-rank branches to single_transformer_blocks.11.proj_out.linears.0 +25-08-28 15:14:00 | D | - Calibrating low-rank branch for single_transformer_blocks.11.proj_mlp +25-08-28 15:14:00 | D | + w: sint4 +25-08-28 15:14:00 | D | + x: sint4 +25-08-28 15:14:00 | D | + y: None +25-08-28 15:14:00 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:14:00 | D | + finished parsing calibration arguments, ram usage: 16.6 +25-08-28 15:14:00 | D | + finished resetting calibrator, ram usage: 16.6 +25-08-28 15:14:02 | D | + finished calculating the original outputs, ram usage: 16.9 +25-08-28 15:14:23 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:14:23 | D | - error = [ 6575.5438, 6526.3377, 6493.0443, 6469.3369, 6450.4704, 6436.4270, 6427.0055, 6417.3066, 6410.6901, 6403.2198] +25-08-28 15:14:23 | D | - best error = [ 6575.5438, 6526.3377, 6493.0443, 6469.3369, 6450.4704, 6436.4270, 6427.0055, 6417.3066, 6410.6901, 6403.2198] +25-08-28 15:14:45 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:14:45 | D | - error = [ 6398.3370, 6393.8830, 6390.0507, 6385.6492, 6381.3673, 6378.8234, 6376.1543, 6373.9002, 6372.3864, 6371.0837] +25-08-28 15:14:45 | D | - best error = [ 6398.3370, 6393.8830, 6390.0507, 6385.6492, 6381.3673, 6378.8234, 6376.1543, 6373.9002, 6372.3864, 6371.0837] +25-08-28 15:15:07 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 15:15:07 | D | - error = [ 6368.8442, 6368.1297, 6365.6374, 6364.9701, 6364.1229, 6363.3686, 6362.9141, 6362.0978, 6360.6537, 6358.3812] +25-08-28 15:15:07 | D | - best error = [ 6368.8442, 6368.1297, 6365.6374, 6364.9701, 6364.1229, 6363.3686, 6362.9141, 6362.0978, 6360.6537, 6358.3812] +25-08-28 15:15:10 | D | - iter = [ 30] +25-08-28 15:15:10 | D | - error = [ 6358.4353] +25-08-28 15:15:10 | D | - best error = [ 6358.3812] +25-08-28 15:15:10 | D | + Adding low-rank branches to single_transformer_blocks.11.proj_mlp +25-08-28 15:15:10 | D | - Calibrating low-rank branch for single_transformer_blocks.11.proj_out.linears.1.linear +25-08-28 15:15:10 | D | + w: sint4 +25-08-28 15:15:10 | D | + x: sint4 +25-08-28 15:15:10 | D | + y: None +25-08-28 15:15:10 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:15:10 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:15:10 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:15:15 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:15:29 | D | - iter = [ 0, 1, 2, 3] +25-08-28 15:15:29 | D | - error = [12785.7834, 12760.6708, 12731.6437, 12732.0320] +25-08-28 15:15:29 | D | - best error = [12785.7834, 12760.6708, 12731.6437, 12731.6437] +25-08-28 15:15:30 | D | + Adding low-rank branches to single_transformer_blocks.11.proj_out.linears.1.linear +25-08-28 15:15:48 | D | - Calibrating low-rank branches of block single_transformer_blocks.12 +25-08-28 15:15:48 | D | - Calibrating low-rank branch for single_transformer_blocks.12.attn.to_q, single_transformer_blocks.12.attn.to_k, single_transformer_blocks.12.attn.to_v +25-08-28 15:15:48 | D | + w: sint4 +25-08-28 15:15:48 | D | + x: sint4 +25-08-28 15:15:48 | D | + y: None +25-08-28 15:15:48 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:15:48 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:15:48 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:15:49 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:16:09 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 15:16:09 | D | - error = [28987.0669, 27693.2812, 27486.6251, 27042.4721, 26782.1744, 26647.4656, 26644.0718, 26613.7516, 26661.7941] +25-08-28 15:16:09 | D | - best error = [28987.0669, 27693.2812, 27486.6251, 27042.4721, 26782.1744, 26647.4656, 26644.0718, 26613.7516, 26613.7516] +25-08-28 15:16:09 | D | + Adding low-rank branches to single_transformer_blocks.12.attn.to_q, single_transformer_blocks.12.attn.to_k, single_transformer_blocks.12.attn.to_v +25-08-28 15:16:09 | D | - Calibrating low-rank branch for single_transformer_blocks.12.proj_out.linears.0 +25-08-28 15:16:09 | D | + w: sint4 +25-08-28 15:16:09 | D | + x: sint4 +25-08-28 15:16:09 | D | + y: None +25-08-28 15:16:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:16:09 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:16:09 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:16:10 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:16:25 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:16:25 | D | - error = [ 4434.8353, 4393.4212, 4369.8426, 4352.9404, 4341.1155, 4326.8016, 4319.7252, 4313.9539, 4306.8244, 4302.1818] +25-08-28 15:16:25 | D | - best error = [ 4434.8353, 4393.4212, 4369.8426, 4352.9404, 4341.1155, 4326.8016, 4319.7252, 4313.9539, 4306.8244, 4302.1818] +25-08-28 15:16:27 | D | - iter = [ 10] +25-08-28 15:16:27 | D | - error = [ 4302.6424] +25-08-28 15:16:27 | D | - best error = [ 4302.1818] +25-08-28 15:16:27 | D | + Adding low-rank branches to single_transformer_blocks.12.proj_out.linears.0 +25-08-28 15:16:27 | D | - Calibrating low-rank branch for single_transformer_blocks.12.proj_mlp +25-08-28 15:16:27 | D | + w: sint4 +25-08-28 15:16:27 | D | + x: sint4 +25-08-28 15:16:27 | D | + y: None +25-08-28 15:16:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:16:27 | D | + finished parsing calibration arguments, ram usage: 17.4 +25-08-28 15:16:27 | D | + finished resetting calibrator, ram usage: 17.4 +25-08-28 15:16:29 | D | + finished calculating the original outputs, ram usage: 17.7 +25-08-28 15:16:50 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:16:50 | D | - error = [ 6750.2625, 6714.0402, 6692.8153, 6676.0162, 6663.2082, 6652.8075, 6645.6614, 6639.3180, 6632.2256, 6627.6020] +25-08-28 15:16:50 | D | - best error = [ 6750.2625, 6714.0402, 6692.8153, 6676.0162, 6663.2082, 6652.8075, 6645.6614, 6639.3180, 6632.2256, 6627.6020] +25-08-28 15:17:12 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:17:12 | D | - error = [ 6624.0785, 6618.7606, 6616.3549, 6614.2516, 6611.1494, 6608.2079, 6607.5747, 6604.6708, 6603.1923, 6602.0491] +25-08-28 15:17:12 | D | - best error = [ 6624.0785, 6618.7606, 6616.3549, 6614.2516, 6611.1494, 6608.2079, 6607.5747, 6604.6708, 6603.1923, 6602.0491] +25-08-28 15:17:34 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 15:17:34 | D | - error = [ 6601.2896, 6600.2048, 6599.6845, 6597.7311, 6596.8311, 6595.9424, 6595.2795, 6595.0685, 6594.0585, 6593.4844] +25-08-28 15:17:34 | D | - best error = [ 6601.2896, 6600.2048, 6599.6845, 6597.7311, 6596.8311, 6595.9424, 6595.2795, 6595.0685, 6594.0585, 6593.4844] +25-08-28 15:17:42 | D | - iter = [ 30, 31, 32, 33] +25-08-28 15:17:42 | D | - error = [ 6591.6578, 6591.5491, 6590.6476, 6590.9647] +25-08-28 15:17:42 | D | - best error = [ 6591.6578, 6591.5491, 6590.6476, 6590.6476] +25-08-28 15:17:43 | D | + Adding low-rank branches to single_transformer_blocks.12.proj_mlp +25-08-28 15:17:43 | D | - Calibrating low-rank branch for single_transformer_blocks.12.proj_out.linears.1.linear +25-08-28 15:17:43 | D | + w: sint4 +25-08-28 15:17:43 | D | + x: sint4 +25-08-28 15:17:43 | D | + y: None +25-08-28 15:17:43 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:17:43 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:17:43 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:17:48 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:18:22 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:18:22 | D | - error = [11797.8859, 11790.1620, 11771.9089, 11757.7978, 11746.3833, 11728.1644, 11724.1337, 11723.5878, 11706.7147, 11710.2840] +25-08-28 15:18:22 | D | - best error = [11797.8859, 11790.1620, 11771.9089, 11757.7978, 11746.3833, 11728.1644, 11724.1337, 11723.5878, 11706.7147, 11706.7147] +25-08-28 15:18:22 | D | + Adding low-rank branches to single_transformer_blocks.12.proj_out.linears.1.linear +25-08-28 15:18:41 | D | - Calibrating low-rank branches of block single_transformer_blocks.13 +25-08-28 15:18:41 | D | - Calibrating low-rank branch for single_transformer_blocks.13.attn.to_q, single_transformer_blocks.13.attn.to_k, single_transformer_blocks.13.attn.to_v +25-08-28 15:18:41 | D | + w: sint4 +25-08-28 15:18:41 | D | + x: sint4 +25-08-28 15:18:41 | D | + y: None +25-08-28 15:18:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:18:41 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:18:41 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:18:42 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:18:58 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 15:18:58 | D | - error = [32907.1401, 32121.8420, 31642.1319, 31327.5751, 31180.7346, 30873.9745, 30925.4577] +25-08-28 15:18:58 | D | - best error = [32907.1401, 32121.8420, 31642.1319, 31327.5751, 31180.7346, 30873.9745, 30873.9745] +25-08-28 15:18:58 | D | + Adding low-rank branches to single_transformer_blocks.13.attn.to_q, single_transformer_blocks.13.attn.to_k, single_transformer_blocks.13.attn.to_v +25-08-28 15:18:58 | D | - Calibrating low-rank branch for single_transformer_blocks.13.proj_out.linears.0 +25-08-28 15:18:58 | D | + w: sint4 +25-08-28 15:18:58 | D | + x: sint4 +25-08-28 15:18:58 | D | + y: None +25-08-28 15:18:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:18:58 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:18:58 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:19:00 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:19:14 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:19:14 | D | - error = [ 4530.4680, 4485.6681, 4454.1031, 4434.0309, 4423.8302, 4411.4032, 4395.3858, 4387.1398, 4380.4918, 4372.8341] +25-08-28 15:19:14 | D | - best error = [ 4530.4680, 4485.6681, 4454.1031, 4434.0309, 4423.8302, 4411.4032, 4395.3858, 4387.1398, 4380.4918, 4372.8341] +25-08-28 15:19:30 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:19:30 | D | - error = [ 4370.0343, 4369.3256, 4365.0162, 4362.3251, 4357.1679, 4355.0065, 4352.9287, 4352.0478, 4349.2898, 4348.2022] +25-08-28 15:19:30 | D | - best error = [ 4370.0343, 4369.3256, 4365.0162, 4362.3251, 4357.1679, 4355.0065, 4352.9287, 4352.0478, 4349.2898, 4348.2022] +25-08-28 15:19:36 | D | - iter = [ 20, 21, 22, 23] +25-08-28 15:19:36 | D | - error = [ 4347.6476, 4345.2190, 4345.2107, 4346.2319] +25-08-28 15:19:36 | D | - best error = [ 4347.6476, 4345.2190, 4345.2107, 4345.2107] +25-08-28 15:19:36 | D | + Adding low-rank branches to single_transformer_blocks.13.proj_out.linears.0 +25-08-28 15:19:36 | D | - Calibrating low-rank branch for single_transformer_blocks.13.proj_mlp +25-08-28 15:19:36 | D | + w: sint4 +25-08-28 15:19:36 | D | + x: sint4 +25-08-28 15:19:36 | D | + y: None +25-08-28 15:19:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:19:36 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:19:36 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:19:38 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:20:00 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:20:00 | D | - error = [ 6578.4062, 6541.7001, 6521.8767, 6504.8508, 6491.1241, 6482.7217, 6475.7925, 6469.0825, 6464.0996, 6459.5641] +25-08-28 15:20:00 | D | - best error = [ 6578.4062, 6541.7001, 6521.8767, 6504.8508, 6491.1241, 6482.7217, 6475.7925, 6469.0825, 6464.0996, 6459.5641] +25-08-28 15:20:22 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:20:22 | D | - error = [ 6456.3603, 6453.7955, 6451.1593, 6448.5605, 6444.8950, 6442.8286, 6440.5335, 6438.5315, 6437.4461, 6435.7250] +25-08-28 15:20:22 | D | - best error = [ 6456.3603, 6453.7955, 6451.1593, 6448.5605, 6444.8950, 6442.8286, 6440.5335, 6438.5315, 6437.4461, 6435.7250] +25-08-28 15:20:37 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26] +25-08-28 15:20:37 | D | - error = [ 6434.0786, 6431.3656, 6431.0456, 6429.4871, 6429.1153, 6428.1487, 6428.2912] +25-08-28 15:20:37 | D | - best error = [ 6434.0786, 6431.3656, 6431.0456, 6429.4871, 6429.1153, 6428.1487, 6428.1487] +25-08-28 15:20:38 | D | + Adding low-rank branches to single_transformer_blocks.13.proj_mlp +25-08-28 15:20:38 | D | - Calibrating low-rank branch for single_transformer_blocks.13.proj_out.linears.1.linear +25-08-28 15:20:38 | D | + w: sint4 +25-08-28 15:20:38 | D | + x: sint4 +25-08-28 15:20:38 | D | + y: None +25-08-28 15:20:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:20:38 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:20:38 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:20:43 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:21:09 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 15:21:09 | D | - error = [12776.9198, 12752.4053, 12693.6053, 12673.5151, 12649.2321, 12632.6118, 12621.6512, 12625.7084] +25-08-28 15:21:09 | D | - best error = [12776.9198, 12752.4053, 12693.6053, 12673.5151, 12649.2321, 12632.6118, 12621.6512, 12621.6512] +25-08-28 15:21:09 | D | + Adding low-rank branches to single_transformer_blocks.13.proj_out.linears.1.linear +25-08-28 15:21:28 | D | - Calibrating low-rank branches of block single_transformer_blocks.14 +25-08-28 15:21:28 | D | - Calibrating low-rank branch for single_transformer_blocks.14.attn.to_q, single_transformer_blocks.14.attn.to_k, single_transformer_blocks.14.attn.to_v +25-08-28 15:21:28 | D | + w: sint4 +25-08-28 15:21:28 | D | + x: sint4 +25-08-28 15:21:28 | D | + y: None +25-08-28 15:21:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:21:28 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:21:28 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:21:29 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:21:53 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:21:53 | D | - error = [30494.8405, 29856.4094, 29407.4951, 28930.7742, 28717.4172, 28712.0067, 28640.1620, 28613.6985, 28549.8200, 28532.3519] +25-08-28 15:21:53 | D | - best error = [30494.8405, 29856.4094, 29407.4951, 28930.7742, 28717.4172, 28712.0067, 28640.1620, 28613.6985, 28549.8200, 28532.3519] +25-08-28 15:21:55 | D | - iter = [ 10] +25-08-28 15:21:55 | D | - error = [28552.4875] +25-08-28 15:21:55 | D | - best error = [28532.3519] +25-08-28 15:21:55 | D | + Adding low-rank branches to single_transformer_blocks.14.attn.to_q, single_transformer_blocks.14.attn.to_k, single_transformer_blocks.14.attn.to_v +25-08-28 15:21:55 | D | - Calibrating low-rank branch for single_transformer_blocks.14.proj_out.linears.0 +25-08-28 15:21:55 | D | + w: sint4 +25-08-28 15:21:55 | D | + x: sint4 +25-08-28 15:21:55 | D | + y: None +25-08-28 15:21:55 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:21:55 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:21:55 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:21:57 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:22:12 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:22:12 | D | - error = [ 5567.3620, 5523.8800, 5492.8319, 5466.8243, 5451.8584, 5436.4190, 5421.9675, 5415.4080, 5407.6586, 5398.4561] +25-08-28 15:22:12 | D | - best error = [ 5567.3620, 5523.8800, 5492.8319, 5466.8243, 5451.8584, 5436.4190, 5421.9675, 5415.4080, 5407.6586, 5398.4561] +25-08-28 15:22:26 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18] +25-08-28 15:22:26 | D | - error = [ 5390.4529, 5385.1722, 5380.7490, 5376.8889, 5374.2345, 5370.3884, 5366.5702, 5365.3790, 5366.4018] +25-08-28 15:22:26 | D | - best error = [ 5390.4529, 5385.1722, 5380.7490, 5376.8889, 5374.2345, 5370.3884, 5366.5702, 5365.3790, 5365.3790] +25-08-28 15:22:27 | D | + Adding low-rank branches to single_transformer_blocks.14.proj_out.linears.0 +25-08-28 15:22:27 | D | - Calibrating low-rank branch for single_transformer_blocks.14.proj_mlp +25-08-28 15:22:27 | D | + w: sint4 +25-08-28 15:22:27 | D | + x: sint4 +25-08-28 15:22:27 | D | + y: None +25-08-28 15:22:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:22:27 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:22:27 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:22:29 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:22:51 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:22:51 | D | - error = [ 6438.4489, 6407.3233, 6388.2523, 6373.5064, 6362.7724, 6354.3881, 6348.1087, 6342.8467, 6336.9835, 6332.0995] +25-08-28 15:22:51 | D | - best error = [ 6438.4489, 6407.3233, 6388.2523, 6373.5064, 6362.7724, 6354.3881, 6348.1087, 6342.8467, 6336.9835, 6332.0995] +25-08-28 15:23:13 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:23:13 | D | - error = [ 6328.6090, 6325.2219, 6323.3036, 6320.9657, 6318.3925, 6316.2619, 6315.5899, 6313.8194, 6311.9179, 6310.5844] +25-08-28 15:23:13 | D | - best error = [ 6328.6090, 6325.2219, 6323.3036, 6320.9657, 6318.3925, 6316.2619, 6315.5899, 6313.8194, 6311.9179, 6310.5844] +25-08-28 15:23:35 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 15:23:35 | D | - error = [ 6309.4177, 6307.6563, 6306.2315, 6305.2537, 6303.8747, 6303.2671, 6302.1266, 6301.5617, 6300.1422, 6299.0253] +25-08-28 15:23:35 | D | - best error = [ 6309.4177, 6307.6563, 6306.2315, 6305.2537, 6303.8747, 6303.2671, 6302.1266, 6301.5617, 6300.1422, 6299.0253] +25-08-28 15:23:37 | D | - iter = [ 30] +25-08-28 15:23:37 | D | - error = [ 6299.0774] +25-08-28 15:23:37 | D | - best error = [ 6299.0253] +25-08-28 15:23:37 | D | + Adding low-rank branches to single_transformer_blocks.14.proj_mlp +25-08-28 15:23:37 | D | - Calibrating low-rank branch for single_transformer_blocks.14.proj_out.linears.1.linear +25-08-28 15:23:37 | D | + w: sint4 +25-08-28 15:23:37 | D | + x: sint4 +25-08-28 15:23:37 | D | + y: None +25-08-28 15:23:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:23:37 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:23:37 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:23:42 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:24:08 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 15:24:08 | D | - error = [12971.1323, 12901.2136, 12867.9845, 12846.3860, 12838.1556, 12828.4060, 12819.9632, 12831.0449] +25-08-28 15:24:08 | D | - best error = [12971.1323, 12901.2136, 12867.9845, 12846.3860, 12838.1556, 12828.4060, 12819.9632, 12819.9632] +25-08-28 15:24:08 | D | + Adding low-rank branches to single_transformer_blocks.14.proj_out.linears.1.linear +25-08-28 15:24:26 | D | - Calibrating low-rank branches of block single_transformer_blocks.15 +25-08-28 15:24:26 | D | - Calibrating low-rank branch for single_transformer_blocks.15.attn.to_q, single_transformer_blocks.15.attn.to_k, single_transformer_blocks.15.attn.to_v +25-08-28 15:24:26 | D | + w: sint4 +25-08-28 15:24:26 | D | + x: sint4 +25-08-28 15:24:26 | D | + y: None +25-08-28 15:24:26 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:24:26 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:24:26 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:24:28 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:24:41 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 15:24:41 | D | - error = [30432.2702, 29805.7562, 29416.9475, 29161.0688, 28935.7523, 28938.5268] +25-08-28 15:24:41 | D | - best error = [30432.2702, 29805.7562, 29416.9475, 29161.0688, 28935.7523, 28935.7523] +25-08-28 15:24:41 | D | + Adding low-rank branches to single_transformer_blocks.15.attn.to_q, single_transformer_blocks.15.attn.to_k, single_transformer_blocks.15.attn.to_v +25-08-28 15:24:41 | D | - Calibrating low-rank branch for single_transformer_blocks.15.proj_out.linears.0 +25-08-28 15:24:41 | D | + w: sint4 +25-08-28 15:24:41 | D | + x: sint4 +25-08-28 15:24:41 | D | + y: None +25-08-28 15:24:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:24:41 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:24:41 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:24:42 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:24:57 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:24:57 | D | - error = [ 4292.9149, 4249.9840, 4219.2217, 4200.7147, 4188.0731, 4179.3428, 4168.6662, 4159.4057, 4156.4408, 4151.6999] +25-08-28 15:24:57 | D | - best error = [ 4292.9149, 4249.9840, 4219.2217, 4200.7147, 4188.0731, 4179.3428, 4168.6662, 4159.4057, 4156.4408, 4151.6999] +25-08-28 15:25:12 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:25:12 | D | - error = [ 4143.5572, 4139.2545, 4139.0968, 4136.3656, 4132.0820, 4127.6591, 4126.4677, 4122.8306, 4118.6146, 4116.7764] +25-08-28 15:25:12 | D | - best error = [ 4143.5572, 4139.2545, 4139.0968, 4136.3656, 4132.0820, 4127.6591, 4126.4677, 4122.8306, 4118.6146, 4116.7764] +25-08-28 15:25:21 | D | - iter = [ 20, 21, 22, 23, 24, 25] +25-08-28 15:25:21 | D | - error = [ 4115.7672, 4114.3080, 4111.9762, 4111.8886, 4111.3455, 4111.5639] +25-08-28 15:25:21 | D | - best error = [ 4115.7672, 4114.3080, 4111.9762, 4111.8886, 4111.3455, 4111.3455] +25-08-28 15:25:22 | D | + Adding low-rank branches to single_transformer_blocks.15.proj_out.linears.0 +25-08-28 15:25:22 | D | - Calibrating low-rank branch for single_transformer_blocks.15.proj_mlp +25-08-28 15:25:22 | D | + w: sint4 +25-08-28 15:25:22 | D | + x: sint4 +25-08-28 15:25:22 | D | + y: None +25-08-28 15:25:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:25:22 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:25:22 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:25:24 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:25:45 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:25:45 | D | - error = [ 6196.0403, 6159.1616, 6134.7215, 6116.1998, 6101.7479, 6091.4662, 6083.5240, 6076.4914, 6070.1189, 6065.0758] +25-08-28 15:25:45 | D | - best error = [ 6196.0403, 6159.1616, 6134.7215, 6116.1998, 6101.7479, 6091.4662, 6083.5240, 6076.4914, 6070.1189, 6065.0758] +25-08-28 15:26:07 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:26:07 | D | - error = [ 6060.2693, 6055.7688, 6052.8222, 6050.0274, 6046.7763, 6045.4810, 6043.3027, 6040.1980, 6038.5223, 6038.0470] +25-08-28 15:26:07 | D | - best error = [ 6060.2693, 6055.7688, 6052.8222, 6050.0274, 6046.7763, 6045.4810, 6043.3027, 6040.1980, 6038.5223, 6038.0470] +25-08-28 15:26:29 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 15:26:29 | D | - error = [ 6036.1206, 6034.6453, 6032.9436, 6032.0245, 6030.4706, 6029.6381, 6028.3859, 6027.7498, 6026.3790, 6025.4165] +25-08-28 15:26:29 | D | - best error = [ 6036.1206, 6034.6453, 6032.9436, 6032.0245, 6030.4706, 6029.6381, 6028.3859, 6027.7498, 6026.3790, 6025.4165] +25-08-28 15:26:47 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37] +25-08-28 15:26:47 | D | - error = [ 6024.7486, 6024.4588, 6023.3393, 6022.6562, 6022.3221, 6021.3789, 6020.5764, 6020.7800] +25-08-28 15:26:47 | D | - best error = [ 6024.7486, 6024.4588, 6023.3393, 6022.6562, 6022.3221, 6021.3789, 6020.5764, 6020.5764] +25-08-28 15:26:47 | D | + Adding low-rank branches to single_transformer_blocks.15.proj_mlp +25-08-28 15:26:47 | D | - Calibrating low-rank branch for single_transformer_blocks.15.proj_out.linears.1.linear +25-08-28 15:26:47 | D | + w: sint4 +25-08-28 15:26:47 | D | + x: sint4 +25-08-28 15:26:47 | D | + y: None +25-08-28 15:26:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:26:47 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:26:47 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:26:52 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:27:30 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 15:27:30 | D | - error = [12997.7469, 12975.0203, 12934.7736, 12927.2735, 12896.8412, 12884.7314, 12871.1547, 12862.9110, 12867.8278] +25-08-28 15:27:30 | D | - best error = [12997.7469, 12975.0203, 12934.7736, 12927.2735, 12896.8412, 12884.7314, 12871.1547, 12862.9110, 12862.9110] +25-08-28 15:27:30 | D | + Adding low-rank branches to single_transformer_blocks.15.proj_out.linears.1.linear +25-08-28 15:27:49 | D | - Calibrating low-rank branches of block single_transformer_blocks.16 +25-08-28 15:27:49 | D | - Calibrating low-rank branch for single_transformer_blocks.16.attn.to_q, single_transformer_blocks.16.attn.to_k, single_transformer_blocks.16.attn.to_v +25-08-28 15:27:49 | D | + w: sint4 +25-08-28 15:27:49 | D | + x: sint4 +25-08-28 15:27:49 | D | + y: None +25-08-28 15:27:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:27:49 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:27:49 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:27:50 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:28:03 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 15:28:03 | D | - error = [29505.4034, 28953.3904, 28691.2469, 28511.0303, 28341.7747, 28349.8114] +25-08-28 15:28:03 | D | - best error = [29505.4034, 28953.3904, 28691.2469, 28511.0303, 28341.7747, 28341.7747] +25-08-28 15:28:04 | D | + Adding low-rank branches to single_transformer_blocks.16.attn.to_q, single_transformer_blocks.16.attn.to_k, single_transformer_blocks.16.attn.to_v +25-08-28 15:28:04 | D | - Calibrating low-rank branch for single_transformer_blocks.16.proj_out.linears.0 +25-08-28 15:28:04 | D | + w: sint4 +25-08-28 15:28:04 | D | + x: sint4 +25-08-28 15:28:04 | D | + y: None +25-08-28 15:28:04 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:28:04 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:28:04 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:28:05 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:28:20 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:28:20 | D | - error = [ 4694.0415, 4641.9946, 4611.1111, 4594.7646, 4579.7410, 4564.3911, 4556.6870, 4549.5151, 4544.7018, 4537.2124] +25-08-28 15:28:20 | D | - best error = [ 4694.0415, 4641.9946, 4611.1111, 4594.7646, 4579.7410, 4564.3911, 4556.6870, 4549.5151, 4544.7018, 4537.2124] +25-08-28 15:28:30 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16] +25-08-28 15:28:30 | D | - error = [ 4532.8124, 4530.8494, 4525.6957, 4521.8932, 4517.4803, 4513.1406, 4513.1583] +25-08-28 15:28:30 | D | - best error = [ 4532.8124, 4530.8494, 4525.6957, 4521.8932, 4517.4803, 4513.1406, 4513.1406] +25-08-28 15:28:30 | D | + Adding low-rank branches to single_transformer_blocks.16.proj_out.linears.0 +25-08-28 15:28:31 | D | - Calibrating low-rank branch for single_transformer_blocks.16.proj_mlp +25-08-28 15:28:31 | D | + w: sint4 +25-08-28 15:28:31 | D | + x: sint4 +25-08-28 15:28:31 | D | + y: None +25-08-28 15:28:31 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:28:31 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:28:31 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:28:32 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:28:55 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:28:55 | D | - error = [ 5709.5252, 5685.1297, 5669.6173, 5659.3712, 5651.1861, 5644.6522, 5639.0664, 5634.2254, 5630.6916, 5627.0440] +25-08-28 15:28:55 | D | - best error = [ 5709.5252, 5685.1297, 5669.6173, 5659.3712, 5651.1861, 5644.6522, 5639.0664, 5634.2254, 5630.6916, 5627.0440] +25-08-28 15:29:17 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:29:17 | D | - error = [ 5623.9687, 5621.7645, 5619.5946, 5617.8724, 5615.3288, 5613.0523, 5610.8074, 5609.4055, 5608.0583, 5606.7188] +25-08-28 15:29:18 | D | - best error = [ 5623.9687, 5621.7645, 5619.5946, 5617.8724, 5615.3288, 5613.0523, 5610.8074, 5609.4055, 5608.0583, 5606.7188] +25-08-28 15:29:40 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 15:29:40 | D | - error = [ 5605.3130, 5604.6645, 5604.1130, 5602.8218, 5601.9245, 5600.9485, 5600.3064, 5599.4503, 5598.3473, 5597.5825] +25-08-28 15:29:40 | D | - best error = [ 5605.3130, 5604.6645, 5604.1130, 5602.8218, 5601.9245, 5600.9485, 5600.3064, 5599.4503, 5598.3473, 5597.5825] +25-08-28 15:30:02 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-28 15:30:02 | D | - error = [ 5597.1823, 5596.6767, 5596.2784, 5596.0654, 5595.8165, 5594.8798, 5594.5813, 5594.1502, 5593.6884, 5593.3727] +25-08-28 15:30:02 | D | - best error = [ 5597.1823, 5596.6767, 5596.2784, 5596.0654, 5595.8165, 5594.8798, 5594.5813, 5594.1502, 5593.6884, 5593.3727] +25-08-28 15:30:24 | D | - iter = [ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49] +25-08-28 15:30:24 | D | - error = [ 5592.5720, 5592.1434, 5592.0485, 5591.5354, 5591.3973, 5591.3585, 5591.1214, 5590.9654, 5590.8185, 5590.4701] +25-08-28 15:30:24 | D | - best error = [ 5592.5720, 5592.1434, 5592.0485, 5591.5354, 5591.3973, 5591.3585, 5591.1214, 5590.9654, 5590.8185, 5590.4701] +25-08-28 15:30:26 | D | - iter = [ 50] +25-08-28 15:30:26 | D | - error = [ 5590.5291] +25-08-28 15:30:26 | D | - best error = [ 5590.4701] +25-08-28 15:30:26 | D | + Adding low-rank branches to single_transformer_blocks.16.proj_mlp +25-08-28 15:30:26 | D | - Calibrating low-rank branch for single_transformer_blocks.16.proj_out.linears.1.linear +25-08-28 15:30:26 | D | + w: sint4 +25-08-28 15:30:26 | D | + x: sint4 +25-08-28 15:30:26 | D | + y: None +25-08-28 15:30:26 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:30:26 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:30:26 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:30:31 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:31:05 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:31:05 | D | - error = [12196.6008, 12163.9349, 12129.0692, 12107.6842, 12085.2849, 12081.8158, 12049.0889, 12033.8549, 12029.5410, 12012.4988] +25-08-28 15:31:05 | D | - best error = [12196.6008, 12163.9349, 12129.0692, 12107.6842, 12085.2849, 12081.8158, 12049.0889, 12033.8549, 12029.5410, 12012.4988] +25-08-28 15:31:08 | D | - iter = [ 10] +25-08-28 15:31:08 | D | - error = [12026.6218] +25-08-28 15:31:08 | D | - best error = [12012.4988] +25-08-28 15:31:09 | D | + Adding low-rank branches to single_transformer_blocks.16.proj_out.linears.1.linear +25-08-28 15:31:27 | D | - Calibrating low-rank branches of block single_transformer_blocks.17 +25-08-28 15:31:27 | D | - Calibrating low-rank branch for single_transformer_blocks.17.attn.to_q, single_transformer_blocks.17.attn.to_k, single_transformer_blocks.17.attn.to_v +25-08-28 15:31:27 | D | + w: sint4 +25-08-28 15:31:27 | D | + x: sint4 +25-08-28 15:31:27 | D | + y: None +25-08-28 15:31:27 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:31:27 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:31:27 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:31:28 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:31:49 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 15:31:49 | D | - error = [37600.0562, 36363.5052, 35715.8206, 35268.1613, 35184.9932, 34910.9661, 34794.1549, 34592.1702, 34615.2466] +25-08-28 15:31:49 | D | - best error = [37600.0562, 36363.5052, 35715.8206, 35268.1613, 35184.9932, 34910.9661, 34794.1549, 34592.1702, 34592.1702] +25-08-28 15:31:50 | D | + Adding low-rank branches to single_transformer_blocks.17.attn.to_q, single_transformer_blocks.17.attn.to_k, single_transformer_blocks.17.attn.to_v +25-08-28 15:31:50 | D | - Calibrating low-rank branch for single_transformer_blocks.17.proj_out.linears.0 +25-08-28 15:31:50 | D | + w: sint4 +25-08-28 15:31:50 | D | + x: sint4 +25-08-28 15:31:50 | D | + y: None +25-08-28 15:31:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:31:50 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:31:50 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:31:52 | D | + finished calculating the original outputs, ram usage: 17.1 +25-08-28 15:32:07 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:32:07 | D | - error = [ 4295.5673, 4246.9062, 4216.9185, 4193.9270, 4178.7161, 4169.1510, 4158.1020, 4149.4260, 4143.4290, 4139.5009] +25-08-28 15:32:07 | D | - best error = [ 4295.5673, 4246.9062, 4216.9185, 4193.9270, 4178.7161, 4169.1510, 4158.1020, 4149.4260, 4143.4290, 4139.5009] +25-08-28 15:32:23 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:32:23 | D | - error = [ 4131.1371, 4125.8917, 4121.8998, 4119.4344, 4117.5889, 4113.7350, 4112.3580, 4109.5868, 4107.5336, 4106.5616] +25-08-28 15:32:23 | D | - best error = [ 4131.1371, 4125.8917, 4121.8998, 4119.4344, 4117.5889, 4113.7350, 4112.3580, 4109.5868, 4107.5336, 4106.5616] +25-08-28 15:32:33 | D | - iter = [ 20, 21, 22, 23, 24, 25] +25-08-28 15:32:33 | D | - error = [ 4105.4001, 4105.3867, 4103.5129, 4101.4583, 4100.7699, 4101.1364] +25-08-28 15:32:33 | D | - best error = [ 4105.4001, 4105.3867, 4103.5129, 4101.4583, 4100.7699, 4100.7699] +25-08-28 15:32:33 | D | + Adding low-rank branches to single_transformer_blocks.17.proj_out.linears.0 +25-08-28 15:32:33 | D | - Calibrating low-rank branch for single_transformer_blocks.17.proj_mlp +25-08-28 15:32:33 | D | + w: sint4 +25-08-28 15:32:33 | D | + x: sint4 +25-08-28 15:32:33 | D | + y: None +25-08-28 15:32:33 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:32:33 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:32:33 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:32:35 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:32:57 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:32:57 | D | - error = [ 6234.7577, 6188.0048, 6159.4195, 6138.6011, 6122.3443, 6110.9246, 6101.1405, 6092.9615, 6086.5261, 6080.4460] +25-08-28 15:32:57 | D | - best error = [ 6234.7577, 6188.0048, 6159.4195, 6138.6011, 6122.3443, 6110.9246, 6101.1405, 6092.9615, 6086.5261, 6080.4460] +25-08-28 15:33:19 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:33:19 | D | - error = [ 6075.2353, 6070.6487, 6066.0082, 6063.8278, 6059.2264, 6056.9408, 6055.0762, 6054.3707, 6051.9104, 6049.8496] +25-08-28 15:33:19 | D | - best error = [ 6075.2353, 6070.6487, 6066.0082, 6063.8278, 6059.2264, 6056.9408, 6055.0762, 6054.3707, 6051.9104, 6049.8496] +25-08-28 15:33:41 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 15:33:41 | D | - error = [ 6048.2470, 6046.2574, 6043.9692, 6042.9880, 6041.8782, 6040.5265, 6039.6973, 6038.5482, 6037.8506, 6037.6419] +25-08-28 15:33:41 | D | - best error = [ 6048.2470, 6046.2574, 6043.9692, 6042.9880, 6041.8782, 6040.5265, 6039.6973, 6038.5482, 6037.8506, 6037.6419] +25-08-28 15:34:03 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-28 15:34:03 | D | - error = [ 6036.5636, 6035.8622, 6034.7984, 6033.8963, 6032.9578, 6032.1827, 6031.8614, 6030.6457, 6029.8507, 6029.3930] +25-08-28 15:34:03 | D | - best error = [ 6036.5636, 6035.8622, 6034.7984, 6033.8963, 6032.9578, 6032.1827, 6031.8614, 6030.6457, 6029.8507, 6029.3930] +25-08-28 15:34:25 | D | - iter = [ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49] +25-08-28 15:34:25 | D | - error = [ 6029.2683, 6028.3560, 6027.8550, 6027.4618, 6026.6253, 6026.5174, 6026.1497, 6025.7733, 6024.9441, 6024.6036] +25-08-28 15:34:25 | D | - best error = [ 6029.2683, 6028.3560, 6027.8550, 6027.4618, 6026.6253, 6026.5174, 6026.1497, 6025.7733, 6024.9441, 6024.6036] +25-08-28 15:34:28 | D | - iter = [ 50] +25-08-28 15:34:28 | D | - error = [ 6024.7148] +25-08-28 15:34:28 | D | - best error = [ 6024.6036] +25-08-28 15:34:28 | D | + Adding low-rank branches to single_transformer_blocks.17.proj_mlp +25-08-28 15:34:28 | D | - Calibrating low-rank branch for single_transformer_blocks.17.proj_out.linears.1.linear +25-08-28 15:34:28 | D | + w: sint4 +25-08-28 15:34:28 | D | + x: sint4 +25-08-28 15:34:28 | D | + y: None +25-08-28 15:34:28 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:34:28 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:34:28 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:34:33 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:34:59 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 15:34:59 | D | - error = [12516.4545, 12483.3669, 12451.7102, 12428.2034, 12399.5593, 12374.9153, 12364.7454, 12368.3441] +25-08-28 15:34:59 | D | - best error = [12516.4545, 12483.3669, 12451.7102, 12428.2034, 12399.5593, 12374.9153, 12364.7454, 12364.7454] +25-08-28 15:35:00 | D | + Adding low-rank branches to single_transformer_blocks.17.proj_out.linears.1.linear +25-08-28 15:35:18 | D | - Calibrating low-rank branches of block single_transformer_blocks.18 +25-08-28 15:35:18 | D | - Calibrating low-rank branch for single_transformer_blocks.18.attn.to_q, single_transformer_blocks.18.attn.to_k, single_transformer_blocks.18.attn.to_v +25-08-28 15:35:18 | D | + w: sint4 +25-08-28 15:35:18 | D | + x: sint4 +25-08-28 15:35:18 | D | + y: None +25-08-28 15:35:18 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:35:18 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:35:18 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:35:19 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:35:33 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 15:35:33 | D | - error = [30088.3116, 29287.6088, 29141.9708, 28830.7047, 28623.6881, 28710.5634] +25-08-28 15:35:33 | D | - best error = [30088.3116, 29287.6088, 29141.9708, 28830.7047, 28623.6881, 28623.6881] +25-08-28 15:35:33 | D | + Adding low-rank branches to single_transformer_blocks.18.attn.to_q, single_transformer_blocks.18.attn.to_k, single_transformer_blocks.18.attn.to_v +25-08-28 15:35:33 | D | - Calibrating low-rank branch for single_transformer_blocks.18.proj_out.linears.0 +25-08-28 15:35:33 | D | + w: sint4 +25-08-28 15:35:33 | D | + x: sint4 +25-08-28 15:35:33 | D | + y: None +25-08-28 15:35:33 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:35:33 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:35:33 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:35:34 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:35:49 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:35:49 | D | - error = [ 3656.0511, 3611.4825, 3585.8412, 3566.6825, 3551.7106, 3538.6808, 3531.4113, 3527.0980, 3521.9678, 3516.8184] +25-08-28 15:35:49 | D | - best error = [ 3656.0511, 3611.4825, 3585.8412, 3566.6825, 3551.7106, 3538.6808, 3531.4113, 3527.0980, 3521.9678, 3516.8184] +25-08-28 15:36:04 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:36:04 | D | - error = [ 3514.6744, 3512.7636, 3512.5387, 3509.8715, 3506.6075, 3503.1011, 3499.0659, 3498.0482, 3497.1085, 3493.8679] +25-08-28 15:36:04 | D | - best error = [ 3514.6744, 3512.7636, 3512.5387, 3509.8715, 3506.6075, 3503.1011, 3499.0659, 3498.0482, 3497.1085, 3493.8679] +25-08-28 15:36:05 | D | - iter = [ 20] +25-08-28 15:36:05 | D | - error = [ 3494.4284] +25-08-28 15:36:05 | D | - best error = [ 3493.8679] +25-08-28 15:36:06 | D | + Adding low-rank branches to single_transformer_blocks.18.proj_out.linears.0 +25-08-28 15:36:06 | D | - Calibrating low-rank branch for single_transformer_blocks.18.proj_mlp +25-08-28 15:36:06 | D | + w: sint4 +25-08-28 15:36:06 | D | + x: sint4 +25-08-28 15:36:06 | D | + y: None +25-08-28 15:36:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:36:06 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:36:06 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:36:08 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:36:29 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:36:29 | D | - error = [ 5798.9502, 5766.9330, 5746.0184, 5731.9916, 5722.5451, 5713.1937, 5705.2835, 5699.2217, 5694.5621, 5690.7167] +25-08-28 15:36:29 | D | - best error = [ 5798.9502, 5766.9330, 5746.0184, 5731.9916, 5722.5451, 5713.1937, 5705.2835, 5699.2217, 5694.5621, 5690.7167] +25-08-28 15:36:51 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:36:51 | D | - error = [ 5686.5007, 5683.4829, 5681.0247, 5678.4729, 5676.1910, 5673.7307, 5671.6763, 5669.9092, 5668.4012, 5666.8174] +25-08-28 15:36:51 | D | - best error = [ 5686.5007, 5683.4829, 5681.0247, 5678.4729, 5676.1910, 5673.7307, 5671.6763, 5669.9092, 5668.4012, 5666.8174] +25-08-28 15:37:13 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 15:37:13 | D | - error = [ 5665.0465, 5664.0249, 5662.5457, 5662.2250, 5660.5968, 5660.2071, 5659.2925, 5658.5744, 5657.9056, 5657.1046] +25-08-28 15:37:13 | D | - best error = [ 5665.0465, 5664.0249, 5662.5457, 5662.2250, 5660.5968, 5660.2071, 5659.2925, 5658.5744, 5657.9056, 5657.1046] +25-08-28 15:37:35 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-28 15:37:35 | D | - error = [ 5656.9783, 5656.3377, 5655.8299, 5655.0912, 5654.5039, 5654.2931, 5653.5614, 5653.1466, 5652.1522, 5651.2960] +25-08-28 15:37:35 | D | - best error = [ 5656.9783, 5656.3377, 5655.8299, 5655.0912, 5654.5039, 5654.2931, 5653.5614, 5653.1466, 5652.1522, 5651.2960] +25-08-28 15:37:40 | D | - iter = [ 40, 41] +25-08-28 15:37:40 | D | - error = [ 5651.2335, 5651.4442] +25-08-28 15:37:40 | D | - best error = [ 5651.2335, 5651.2335] +25-08-28 15:37:40 | D | + Adding low-rank branches to single_transformer_blocks.18.proj_mlp +25-08-28 15:37:40 | D | - Calibrating low-rank branch for single_transformer_blocks.18.proj_out.linears.1.linear +25-08-28 15:37:40 | D | + w: sint4 +25-08-28 15:37:40 | D | + x: sint4 +25-08-28 15:37:40 | D | + y: None +25-08-28 15:37:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:37:40 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:37:40 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:37:45 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:38:11 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 15:38:11 | D | - error = [11654.5229, 11641.7655, 11608.9738, 11604.9741, 11579.8691, 11577.9981, 11572.0930, 11574.6658] +25-08-28 15:38:11 | D | - best error = [11654.5229, 11641.7655, 11608.9738, 11604.9741, 11579.8691, 11577.9981, 11572.0930, 11572.0930] +25-08-28 15:38:11 | D | + Adding low-rank branches to single_transformer_blocks.18.proj_out.linears.1.linear +25-08-28 15:38:29 | D | - Calibrating low-rank branches of block single_transformer_blocks.19 +25-08-28 15:38:29 | D | - Calibrating low-rank branch for single_transformer_blocks.19.attn.to_q, single_transformer_blocks.19.attn.to_k, single_transformer_blocks.19.attn.to_v +25-08-28 15:38:29 | D | + w: sint4 +25-08-28 15:38:29 | D | + x: sint4 +25-08-28 15:38:29 | D | + y: None +25-08-28 15:38:29 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:38:29 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:38:29 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:38:31 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:38:48 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 15:38:48 | D | - error = [30517.4258, 30172.9246, 29308.8645, 29209.0846, 28971.5668, 28662.1724, 28435.0443, 28466.3282] +25-08-28 15:38:48 | D | - best error = [30517.4258, 30172.9246, 29308.8645, 29209.0846, 28971.5668, 28662.1724, 28435.0443, 28435.0443] +25-08-28 15:38:49 | D | + Adding low-rank branches to single_transformer_blocks.19.attn.to_q, single_transformer_blocks.19.attn.to_k, single_transformer_blocks.19.attn.to_v +25-08-28 15:38:49 | D | - Calibrating low-rank branch for single_transformer_blocks.19.proj_out.linears.0 +25-08-28 15:38:49 | D | + w: sint4 +25-08-28 15:38:49 | D | + x: sint4 +25-08-28 15:38:49 | D | + y: None +25-08-28 15:38:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:38:49 | D | + finished parsing calibration arguments, ram usage: 15.6 +25-08-28 15:38:49 | D | + finished resetting calibrator, ram usage: 15.6 +25-08-28 15:38:50 | D | + finished calculating the original outputs, ram usage: 15.6 +25-08-28 15:39:05 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:39:05 | D | - error = [ 3603.6497, 3571.9856, 3550.4557, 3537.1968, 3526.3755, 3514.3823, 3508.0413, 3503.0116, 3496.1237, 3494.5878] +25-08-28 15:39:05 | D | - best error = [ 3603.6497, 3571.9856, 3550.4557, 3537.1968, 3526.3755, 3514.3823, 3508.0413, 3503.0116, 3496.1237, 3494.5878] +25-08-28 15:39:15 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16] +25-08-28 15:39:15 | D | - error = [ 3490.6611, 3485.6410, 3481.3105, 3477.3736, 3474.6016, 3472.9221, 3473.4507] +25-08-28 15:39:15 | D | - best error = [ 3490.6611, 3485.6410, 3481.3105, 3477.3736, 3474.6016, 3472.9221, 3472.9221] +25-08-28 15:39:15 | D | + Adding low-rank branches to single_transformer_blocks.19.proj_out.linears.0 +25-08-28 15:39:16 | D | - Calibrating low-rank branch for single_transformer_blocks.19.proj_mlp +25-08-28 15:39:16 | D | + w: sint4 +25-08-28 15:39:16 | D | + x: sint4 +25-08-28 15:39:16 | D | + y: None +25-08-28 15:39:16 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:39:16 | D | + finished parsing calibration arguments, ram usage: 18.3 +25-08-28 15:39:16 | D | + finished resetting calibrator, ram usage: 18.3 +25-08-28 15:39:18 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:39:39 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:39:39 | D | - error = [ 5688.0835, 5663.3464, 5649.0815, 5636.2289, 5627.7914, 5621.2787, 5614.8760, 5610.1397, 5606.7984, 5603.1593] +25-08-28 15:39:39 | D | - best error = [ 5688.0835, 5663.3464, 5649.0815, 5636.2289, 5627.7914, 5621.2787, 5614.8760, 5610.1397, 5606.7984, 5603.1593] +25-08-28 15:40:01 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:40:01 | D | - error = [ 5600.7010, 5597.7765, 5595.3726, 5594.1001, 5591.9430, 5590.1828, 5588.6211, 5587.1184, 5586.3938, 5584.9637] +25-08-28 15:40:01 | D | - best error = [ 5600.7010, 5597.7765, 5595.3726, 5594.1001, 5591.9430, 5590.1828, 5588.6211, 5587.1184, 5586.3938, 5584.9637] +25-08-28 15:40:23 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 15:40:23 | D | - error = [ 5584.0444, 5582.9841, 5582.2936, 5581.2377, 5580.4385, 5579.7437, 5579.1971, 5577.8888, 5576.8848, 5576.1727] +25-08-28 15:40:23 | D | - best error = [ 5584.0444, 5582.9841, 5582.2936, 5581.2377, 5580.4385, 5579.7437, 5579.1971, 5577.8888, 5576.8848, 5576.1727] +25-08-28 15:40:43 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38] +25-08-28 15:40:43 | D | - error = [ 5575.7265, 5575.1373, 5574.4460, 5574.1884, 5574.1622, 5573.7097, 5573.1526, 5573.0672, 5573.1621] +25-08-28 15:40:43 | D | - best error = [ 5575.7265, 5575.1373, 5574.4460, 5574.1884, 5574.1622, 5573.7097, 5573.1526, 5573.0672, 5573.0672] +25-08-28 15:40:43 | D | + Adding low-rank branches to single_transformer_blocks.19.proj_mlp +25-08-28 15:40:43 | D | - Calibrating low-rank branch for single_transformer_blocks.19.proj_out.linears.1.linear +25-08-28 15:40:43 | D | + w: sint4 +25-08-28 15:40:43 | D | + x: sint4 +25-08-28 15:40:43 | D | + y: None +25-08-28 15:40:43 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:40:43 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:40:43 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:40:48 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:41:21 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:41:21 | D | - error = [10346.6779, 10330.9566, 10301.5059, 10293.3602, 10274.7298, 10263.5631, 10244.0226, 10243.7563, 10229.7003, 10228.1674] +25-08-28 15:41:21 | D | - best error = [10346.6779, 10330.9566, 10301.5059, 10293.3602, 10274.7298, 10263.5631, 10244.0226, 10243.7563, 10229.7003, 10228.1674] +25-08-28 15:41:31 | D | - iter = [ 10, 11, 12] +25-08-28 15:41:31 | D | - error = [10223.8365, 10218.6888, 10231.4703] +25-08-28 15:41:31 | D | - best error = [10223.8365, 10218.6888, 10218.6888] +25-08-28 15:41:31 | D | + Adding low-rank branches to single_transformer_blocks.19.proj_out.linears.1.linear +25-08-28 15:41:49 | D | - Calibrating low-rank branches of block single_transformer_blocks.20 +25-08-28 15:41:49 | D | - Calibrating low-rank branch for single_transformer_blocks.20.attn.to_q, single_transformer_blocks.20.attn.to_k, single_transformer_blocks.20.attn.to_v +25-08-28 15:41:49 | D | + w: sint4 +25-08-28 15:41:49 | D | + x: sint4 +25-08-28 15:41:49 | D | + y: None +25-08-28 15:41:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:41:49 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:41:49 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:41:51 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:42:06 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 15:42:06 | D | - error = [31735.9526, 30926.3776, 30634.1624, 30410.7265, 30138.5653, 29990.4582, 30007.4263] +25-08-28 15:42:06 | D | - best error = [31735.9526, 30926.3776, 30634.1624, 30410.7265, 30138.5653, 29990.4582, 29990.4582] +25-08-28 15:42:06 | D | + Adding low-rank branches to single_transformer_blocks.20.attn.to_q, single_transformer_blocks.20.attn.to_k, single_transformer_blocks.20.attn.to_v +25-08-28 15:42:06 | D | - Calibrating low-rank branch for single_transformer_blocks.20.proj_out.linears.0 +25-08-28 15:42:06 | D | + w: sint4 +25-08-28 15:42:06 | D | + x: sint4 +25-08-28 15:42:06 | D | + y: None +25-08-28 15:42:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:42:06 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:42:06 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:42:07 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:42:23 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:42:23 | D | - error = [ 3764.9571, 3724.1493, 3701.5675, 3688.5523, 3670.3080, 3661.9492, 3652.3356, 3647.3693, 3645.1229, 3640.5327] +25-08-28 15:42:23 | D | - best error = [ 3764.9571, 3724.1493, 3701.5675, 3688.5523, 3670.3080, 3661.9492, 3652.3356, 3647.3693, 3645.1229, 3640.5327] +25-08-28 15:42:37 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:42:37 | D | - error = [ 3638.9064, 3636.8877, 3633.5539, 3632.1801, 3630.7691, 3630.0306, 3626.7946, 3626.0776, 3622.6806, 3623.1126] +25-08-28 15:42:37 | D | - best error = [ 3638.9064, 3636.8877, 3633.5539, 3632.1801, 3630.7691, 3630.0306, 3626.7946, 3626.0776, 3622.6806, 3622.6806] +25-08-28 15:42:37 | D | + Adding low-rank branches to single_transformer_blocks.20.proj_out.linears.0 +25-08-28 15:42:38 | D | - Calibrating low-rank branch for single_transformer_blocks.20.proj_mlp +25-08-28 15:42:38 | D | + w: sint4 +25-08-28 15:42:38 | D | + x: sint4 +25-08-28 15:42:38 | D | + y: None +25-08-28 15:42:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:42:38 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:42:38 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:42:39 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:43:02 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:43:02 | D | - error = [ 5999.8747, 5961.9148, 5936.5680, 5918.8948, 5905.6302, 5896.3904, 5886.8796, 5879.8646, 5874.3425, 5868.9679] +25-08-28 15:43:02 | D | - best error = [ 5999.8747, 5961.9148, 5936.5680, 5918.8948, 5905.6302, 5896.3904, 5886.8796, 5879.8646, 5874.3425, 5868.9679] +25-08-28 15:43:25 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:43:25 | D | - error = [ 5864.7424, 5860.4386, 5858.2250, 5854.3495, 5851.5882, 5850.0797, 5847.3392, 5846.8778, 5845.2689, 5842.6994] +25-08-28 15:43:25 | D | - best error = [ 5864.7424, 5860.4386, 5858.2250, 5854.3495, 5851.5882, 5850.0797, 5847.3392, 5846.8778, 5845.2689, 5842.6994] +25-08-28 15:43:47 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 15:43:47 | D | - error = [ 5840.3007, 5838.9444, 5838.2035, 5837.4207, 5836.3985, 5834.8810, 5834.2506, 5833.3140, 5833.1460, 5831.7099] +25-08-28 15:43:47 | D | - best error = [ 5840.3007, 5838.9444, 5838.2035, 5837.4207, 5836.3985, 5834.8810, 5834.2506, 5833.3140, 5833.1460, 5831.7099] +25-08-28 15:44:09 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-28 15:44:09 | D | - error = [ 5831.4321, 5830.8994, 5829.8220, 5829.8041, 5829.0908, 5828.3885, 5827.1434, 5826.4799, 5825.8373, 5825.2203] +25-08-28 15:44:09 | D | - best error = [ 5831.4321, 5830.8994, 5829.8220, 5829.8041, 5829.0908, 5828.3885, 5827.1434, 5826.4799, 5825.8373, 5825.2203] +25-08-28 15:44:31 | D | - iter = [ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49] +25-08-28 15:44:31 | D | - error = [ 5824.7606, 5824.1054, 5823.6999, 5823.4124, 5823.2675, 5823.0899, 5822.3178, 5821.8543, 5821.8416, 5821.3827] +25-08-28 15:44:31 | D | - best error = [ 5824.7606, 5824.1054, 5823.6999, 5823.4124, 5823.2675, 5823.0899, 5822.3178, 5821.8543, 5821.8416, 5821.3827] +25-08-28 15:44:53 | D | - iter = [ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59] +25-08-28 15:44:53 | D | - error = [ 5820.8487, 5820.6436, 5820.3955, 5819.8106, 5819.6711, 5819.5605, 5819.2024, 5818.7188, 5818.2771, 5818.5531] +25-08-28 15:44:53 | D | - best error = [ 5820.8487, 5820.6436, 5820.3955, 5819.8106, 5819.6711, 5819.5605, 5819.2024, 5818.7188, 5818.2771, 5818.2771] +25-08-28 15:44:53 | D | + Adding low-rank branches to single_transformer_blocks.20.proj_mlp +25-08-28 15:44:54 | D | - Calibrating low-rank branch for single_transformer_blocks.20.proj_out.linears.1.linear +25-08-28 15:44:54 | D | + w: sint4 +25-08-28 15:44:54 | D | + x: sint4 +25-08-28 15:44:54 | D | + y: None +25-08-28 15:44:54 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:44:54 | D | + finished parsing calibration arguments, ram usage: 18.2 +25-08-28 15:44:54 | D | + finished resetting calibrator, ram usage: 18.2 +25-08-28 15:44:59 | D | + finished calculating the original outputs, ram usage: 18.2 +25-08-28 15:45:28 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 15:45:28 | D | - error = [10488.2185, 10461.6435, 10460.9606, 10446.7637, 10425.7600, 10423.1700, 10408.4489, 10398.5507, 10404.0884] +25-08-28 15:45:28 | D | - best error = [10488.2185, 10461.6435, 10460.9606, 10446.7637, 10425.7600, 10423.1700, 10408.4489, 10398.5507, 10398.5507] +25-08-28 15:45:28 | D | + Adding low-rank branches to single_transformer_blocks.20.proj_out.linears.1.linear +25-08-28 15:45:47 | D | - Calibrating low-rank branches of block single_transformer_blocks.21 +25-08-28 15:45:47 | D | - Calibrating low-rank branch for single_transformer_blocks.21.attn.to_q, single_transformer_blocks.21.attn.to_k, single_transformer_blocks.21.attn.to_v +25-08-28 15:45:47 | D | + w: sint4 +25-08-28 15:45:47 | D | + x: sint4 +25-08-28 15:45:47 | D | + y: None +25-08-28 15:45:47 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:45:47 | D | + finished parsing calibration arguments, ram usage: 17.9 +25-08-28 15:45:47 | D | + finished resetting calibrator, ram usage: 17.9 +25-08-28 15:45:48 | D | + finished calculating the original outputs, ram usage: 17.9 +25-08-28 15:46:01 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 15:46:01 | D | - error = [30275.0125, 29392.6707, 28827.8710, 28623.2040, 28429.0601, 28986.5315] +25-08-28 15:46:01 | D | - best error = [30275.0125, 29392.6707, 28827.8710, 28623.2040, 28429.0601, 28429.0601] +25-08-28 15:46:01 | D | + Adding low-rank branches to single_transformer_blocks.21.attn.to_q, single_transformer_blocks.21.attn.to_k, single_transformer_blocks.21.attn.to_v +25-08-28 15:46:02 | D | - Calibrating low-rank branch for single_transformer_blocks.21.proj_out.linears.0 +25-08-28 15:46:02 | D | + w: sint4 +25-08-28 15:46:02 | D | + x: sint4 +25-08-28 15:46:02 | D | + y: None +25-08-28 15:46:02 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:46:02 | D | + finished parsing calibration arguments, ram usage: 17.9 +25-08-28 15:46:02 | D | + finished resetting calibrator, ram usage: 17.9 +25-08-28 15:46:02 | D | + finished calculating the original outputs, ram usage: 17.9 +25-08-28 15:46:18 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:46:18 | D | - error = [ 3288.9193, 3260.0057, 3240.7437, 3223.3008, 3211.0000, 3201.9631, 3197.4967, 3193.0108, 3186.7780, 3183.2389] +25-08-28 15:46:18 | D | - best error = [ 3288.9193, 3260.0057, 3240.7437, 3223.3008, 3211.0000, 3201.9631, 3197.4967, 3193.0108, 3186.7780, 3183.2389] +25-08-28 15:46:25 | D | - iter = [ 10, 11, 12, 13, 14] +25-08-28 15:46:25 | D | - error = [ 3178.4276, 3173.6055, 3172.5148, 3171.5714, 3171.7232] +25-08-28 15:46:25 | D | - best error = [ 3178.4276, 3173.6055, 3172.5148, 3171.5714, 3171.5714] +25-08-28 15:46:26 | D | + Adding low-rank branches to single_transformer_blocks.21.proj_out.linears.0 +25-08-28 15:46:26 | D | - Calibrating low-rank branch for single_transformer_blocks.21.proj_mlp +25-08-28 15:46:26 | D | + w: sint4 +25-08-28 15:46:26 | D | + x: sint4 +25-08-28 15:46:26 | D | + y: None +25-08-28 15:46:26 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:46:26 | D | + finished parsing calibration arguments, ram usage: 17.9 +25-08-28 15:46:26 | D | + finished resetting calibrator, ram usage: 17.9 +25-08-28 15:46:28 | D | + finished calculating the original outputs, ram usage: 17.9 +25-08-28 15:46:49 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:46:49 | D | - error = [ 5622.1405, 5592.6858, 5575.0885, 5562.3980, 5552.3836, 5543.6239, 5536.6324, 5531.3215, 5526.5155, 5521.7638] +25-08-28 15:46:49 | D | - best error = [ 5622.1405, 5592.6858, 5575.0885, 5562.3980, 5552.3836, 5543.6239, 5536.6324, 5531.3215, 5526.5155, 5521.7638] +25-08-28 15:47:11 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:47:11 | D | - error = [ 5518.6361, 5515.7128, 5513.8331, 5511.0865, 5508.8990, 5507.1475, 5504.9836, 5503.0768, 5501.3149, 5500.2885] +25-08-28 15:47:11 | D | - best error = [ 5518.6361, 5515.7128, 5513.8331, 5511.0865, 5508.8990, 5507.1475, 5504.9836, 5503.0768, 5501.3149, 5500.2885] +25-08-28 15:47:33 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 15:47:33 | D | - error = [ 5498.7828, 5497.9363, 5496.3957, 5495.2510, 5494.4568, 5493.5903, 5492.9768, 5492.4332, 5490.8200, 5490.9220] +25-08-28 15:47:33 | D | - best error = [ 5498.7828, 5497.9363, 5496.3957, 5495.2510, 5494.4568, 5493.5903, 5492.9768, 5492.4332, 5490.8200, 5490.8200] +25-08-28 15:47:34 | D | + Adding low-rank branches to single_transformer_blocks.21.proj_mlp +25-08-28 15:47:34 | D | - Calibrating low-rank branch for single_transformer_blocks.21.proj_out.linears.1.linear +25-08-28 15:47:34 | D | + w: sint4 +25-08-28 15:47:34 | D | + x: sint4 +25-08-28 15:47:34 | D | + y: None +25-08-28 15:47:34 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:47:34 | D | + finished parsing calibration arguments, ram usage: 15.2 +25-08-28 15:47:34 | D | + finished resetting calibrator, ram usage: 15.2 +25-08-28 15:47:39 | D | + finished calculating the original outputs, ram usage: 15.2 +25-08-28 15:48:02 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 15:48:02 | D | - error = [ 9860.7586, 9815.1218, 9792.8570, 9777.9110, 9756.4002, 9743.0463, 9747.8182] +25-08-28 15:48:02 | D | - best error = [ 9860.7586, 9815.1218, 9792.8570, 9777.9110, 9756.4002, 9743.0463, 9743.0463] +25-08-28 15:48:02 | D | + Adding low-rank branches to single_transformer_blocks.21.proj_out.linears.1.linear +25-08-28 15:48:20 | D | - Calibrating low-rank branches of block single_transformer_blocks.22 +25-08-28 15:48:20 | D | - Calibrating low-rank branch for single_transformer_blocks.22.attn.to_q, single_transformer_blocks.22.attn.to_k, single_transformer_blocks.22.attn.to_v +25-08-28 15:48:20 | D | + w: sint4 +25-08-28 15:48:20 | D | + x: sint4 +25-08-28 15:48:20 | D | + y: None +25-08-28 15:48:20 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:48:20 | D | + finished parsing calibration arguments, ram usage: 15.2 +25-08-28 15:48:20 | D | + finished resetting calibrator, ram usage: 15.2 +25-08-28 15:48:22 | D | + finished calculating the original outputs, ram usage: 15.2 +25-08-28 15:48:39 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 15:48:39 | D | - error = [37534.2574, 36885.1016, 36434.4452, 36149.6941, 36024.3402, 35694.1138, 35563.3573, 35602.9302] +25-08-28 15:48:39 | D | - best error = [37534.2574, 36885.1016, 36434.4452, 36149.6941, 36024.3402, 35694.1138, 35563.3573, 35563.3573] +25-08-28 15:48:40 | D | + Adding low-rank branches to single_transformer_blocks.22.attn.to_q, single_transformer_blocks.22.attn.to_k, single_transformer_blocks.22.attn.to_v +25-08-28 15:48:40 | D | - Calibrating low-rank branch for single_transformer_blocks.22.proj_out.linears.0 +25-08-28 15:48:40 | D | + w: sint4 +25-08-28 15:48:40 | D | + x: sint4 +25-08-28 15:48:40 | D | + y: None +25-08-28 15:48:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:48:40 | D | + finished parsing calibration arguments, ram usage: 15.2 +25-08-28 15:48:40 | D | + finished resetting calibrator, ram usage: 15.2 +25-08-28 15:48:41 | D | + finished calculating the original outputs, ram usage: 15.2 +25-08-28 15:48:56 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:48:56 | D | - error = [ 3528.1696, 3498.4208, 3477.1386, 3458.7762, 3445.6953, 3437.3965, 3427.6107, 3422.0853, 3417.7689, 3410.7141] +25-08-28 15:48:56 | D | - best error = [ 3528.1696, 3498.4208, 3477.1386, 3458.7762, 3445.6953, 3437.3965, 3427.6107, 3422.0853, 3417.7689, 3410.7141] +25-08-28 15:49:11 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:49:11 | D | - error = [ 3407.4545, 3403.7092, 3402.0732, 3401.8101, 3399.6870, 3397.2174, 3394.9814, 3391.0413, 3387.8943, 3386.6029] +25-08-28 15:49:11 | D | - best error = [ 3407.4545, 3403.7092, 3402.0732, 3401.8101, 3399.6870, 3397.2174, 3394.9814, 3391.0413, 3387.8943, 3386.6029] +25-08-28 15:49:17 | D | - iter = [ 20, 21, 22, 23] +25-08-28 15:49:17 | D | - error = [ 3385.5635, 3382.4367, 3382.1622, 3382.2396] +25-08-28 15:49:17 | D | - best error = [ 3385.5635, 3382.4367, 3382.1622, 3382.1622] +25-08-28 15:49:17 | D | + Adding low-rank branches to single_transformer_blocks.22.proj_out.linears.0 +25-08-28 15:49:17 | D | - Calibrating low-rank branch for single_transformer_blocks.22.proj_mlp +25-08-28 15:49:17 | D | + w: sint4 +25-08-28 15:49:17 | D | + x: sint4 +25-08-28 15:49:17 | D | + y: None +25-08-28 15:49:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:49:17 | D | + finished parsing calibration arguments, ram usage: 15.2 +25-08-28 15:49:17 | D | + finished resetting calibrator, ram usage: 15.2 +25-08-28 15:49:19 | D | + finished calculating the original outputs, ram usage: 15.2 +25-08-28 15:49:41 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:49:41 | D | - error = [ 5647.9903, 5618.9346, 5599.5461, 5587.2473, 5577.5729, 5569.2538, 5562.3725, 5557.0890, 5552.5214, 5548.4782] +25-08-28 15:49:41 | D | - best error = [ 5647.9903, 5618.9346, 5599.5461, 5587.2473, 5577.5729, 5569.2538, 5562.3725, 5557.0890, 5552.5214, 5548.4782] +25-08-28 15:50:03 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:50:03 | D | - error = [ 5544.8538, 5542.0316, 5538.8950, 5536.1828, 5533.9888, 5532.0687, 5530.4437, 5528.8574, 5527.7218, 5526.3406] +25-08-28 15:50:03 | D | - best error = [ 5544.8538, 5542.0316, 5538.8950, 5536.1828, 5533.9888, 5532.0687, 5530.4437, 5528.8574, 5527.7218, 5526.3406] +25-08-28 15:50:25 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 15:50:25 | D | - error = [ 5524.6205, 5523.7297, 5522.8543, 5522.3604, 5521.8767, 5521.1714, 5520.7304, 5519.7437, 5519.3809, 5519.0334] +25-08-28 15:50:25 | D | - best error = [ 5524.6205, 5523.7297, 5522.8543, 5522.3604, 5521.8767, 5521.1714, 5520.7304, 5519.7437, 5519.3809, 5519.0334] +25-08-28 15:50:47 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-28 15:50:47 | D | - error = [ 5518.5571, 5518.0613, 5518.0428, 5517.5466, 5516.4554, 5515.8111, 5515.2057, 5514.6285, 5514.3315, 5513.6563] +25-08-28 15:50:47 | D | - best error = [ 5518.5571, 5518.0613, 5518.0428, 5517.5466, 5516.4554, 5515.8111, 5515.2057, 5514.6285, 5514.3315, 5513.6563] +25-08-28 15:51:09 | D | - iter = [ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49] +25-08-28 15:51:09 | D | - error = [ 5513.1873, 5512.7568, 5512.2669, 5512.1700, 5511.8374, 5511.6145, 5511.0890, 5511.0663, 5510.8988, 5510.8769] +25-08-28 15:51:09 | D | - best error = [ 5513.1873, 5512.7568, 5512.2669, 5512.1700, 5511.8374, 5511.6145, 5511.0890, 5511.0663, 5510.8988, 5510.8769] +25-08-28 15:51:24 | D | - iter = [ 50, 51, 52, 53, 54, 55, 56] +25-08-28 15:51:24 | D | - error = [ 5510.5878, 5510.4232, 5510.1735, 5509.8098, 5509.2829, 5508.9616, 5508.9643] +25-08-28 15:51:24 | D | - best error = [ 5510.5878, 5510.4232, 5510.1735, 5509.8098, 5509.2829, 5508.9616, 5508.9616] +25-08-28 15:51:24 | D | + Adding low-rank branches to single_transformer_blocks.22.proj_mlp +25-08-28 15:51:24 | D | - Calibrating low-rank branch for single_transformer_blocks.22.proj_out.linears.1.linear +25-08-28 15:51:24 | D | + w: sint4 +25-08-28 15:51:24 | D | + x: sint4 +25-08-28 15:51:24 | D | + y: None +25-08-28 15:51:24 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:51:24 | D | + finished parsing calibration arguments, ram usage: 15.3 +25-08-28 15:51:24 | D | + finished resetting calibrator, ram usage: 15.3 +25-08-28 15:51:29 | D | + finished calculating the original outputs, ram usage: 15.3 +25-08-28 15:51:49 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 15:51:49 | D | - error = [10683.8143, 10648.2720, 10622.9141, 10597.1392, 10582.4799, 10583.4229] +25-08-28 15:51:49 | D | - best error = [10683.8143, 10648.2720, 10622.9141, 10597.1392, 10582.4799, 10582.4799] +25-08-28 15:51:50 | D | + Adding low-rank branches to single_transformer_blocks.22.proj_out.linears.1.linear +25-08-28 15:52:08 | D | - Calibrating low-rank branches of block single_transformer_blocks.23 +25-08-28 15:52:08 | D | - Calibrating low-rank branch for single_transformer_blocks.23.attn.to_q, single_transformer_blocks.23.attn.to_k, single_transformer_blocks.23.attn.to_v +25-08-28 15:52:08 | D | + w: sint4 +25-08-28 15:52:08 | D | + x: sint4 +25-08-28 15:52:08 | D | + y: None +25-08-28 15:52:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:52:08 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 15:52:08 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 15:52:09 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 15:52:20 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-28 15:52:20 | D | - error = [37396.6587, 36176.8651, 36066.1259, 35732.7556, 35748.5655] +25-08-28 15:52:20 | D | - best error = [37396.6587, 36176.8651, 36066.1259, 35732.7556, 35732.7556] +25-08-28 15:52:20 | D | + Adding low-rank branches to single_transformer_blocks.23.attn.to_q, single_transformer_blocks.23.attn.to_k, single_transformer_blocks.23.attn.to_v +25-08-28 15:52:21 | D | - Calibrating low-rank branch for single_transformer_blocks.23.proj_out.linears.0 +25-08-28 15:52:21 | D | + w: sint4 +25-08-28 15:52:21 | D | + x: sint4 +25-08-28 15:52:21 | D | + y: None +25-08-28 15:52:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:52:21 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 15:52:21 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 15:52:21 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 15:52:37 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:52:37 | D | - error = [ 3686.5493, 3649.8594, 3628.3482, 3608.5846, 3595.8871, 3588.4288, 3581.7184, 3575.4247, 3571.5226, 3567.1175] +25-08-28 15:52:37 | D | - best error = [ 3686.5493, 3649.8594, 3628.3482, 3608.5846, 3595.8871, 3588.4288, 3581.7184, 3575.4247, 3571.5226, 3567.1175] +25-08-28 15:52:44 | D | - iter = [ 10, 11, 12, 13, 14] +25-08-28 15:52:44 | D | - error = [ 3562.7613, 3560.1883, 3555.1698, 3551.3673, 3554.3717] +25-08-28 15:52:44 | D | - best error = [ 3562.7613, 3560.1883, 3555.1698, 3551.3673, 3551.3673] +25-08-28 15:52:44 | D | + Adding low-rank branches to single_transformer_blocks.23.proj_out.linears.0 +25-08-28 15:52:44 | D | - Calibrating low-rank branch for single_transformer_blocks.23.proj_mlp +25-08-28 15:52:44 | D | + w: sint4 +25-08-28 15:52:44 | D | + x: sint4 +25-08-28 15:52:44 | D | + y: None +25-08-28 15:52:44 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:52:44 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 15:52:44 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 15:52:46 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 15:53:07 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:53:07 | D | - error = [ 5468.3038, 5438.4588, 5419.6747, 5406.4450, 5395.9609, 5387.3196, 5380.8981, 5375.1724, 5370.3929, 5366.5087] +25-08-28 15:53:07 | D | - best error = [ 5468.3038, 5438.4588, 5419.6747, 5406.4450, 5395.9609, 5387.3196, 5380.8981, 5375.1724, 5370.3929, 5366.5087] +25-08-28 15:53:29 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:53:29 | D | - error = [ 5362.9391, 5359.4992, 5357.2045, 5354.7779, 5352.5609, 5351.2427, 5349.9024, 5347.8108, 5346.8144, 5345.0998] +25-08-28 15:53:29 | D | - best error = [ 5362.9391, 5359.4992, 5357.2045, 5354.7779, 5352.5609, 5351.2427, 5349.9024, 5347.8108, 5346.8144, 5345.0998] +25-08-28 15:53:50 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 15:53:50 | D | - error = [ 5343.6965, 5342.8891, 5341.6606, 5340.4472, 5339.1655, 5338.7206, 5338.2308, 5337.8306, 5337.1065, 5336.3802] +25-08-28 15:53:50 | D | - best error = [ 5343.6965, 5342.8891, 5341.6606, 5340.4472, 5339.1655, 5338.7206, 5338.2308, 5337.8306, 5337.1065, 5336.3802] +25-08-28 15:54:12 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-28 15:54:12 | D | - error = [ 5335.2365, 5334.2208, 5333.4117, 5333.2759, 5332.9864, 5331.9704, 5331.1473, 5330.9515, 5330.6975, 5330.5366] +25-08-28 15:54:12 | D | - best error = [ 5335.2365, 5334.2208, 5333.4117, 5333.2759, 5332.9864, 5331.9704, 5331.1473, 5330.9515, 5330.6975, 5330.5366] +25-08-28 15:54:21 | D | - iter = [ 40, 41, 42, 43] +25-08-28 15:54:21 | D | - error = [ 5329.6760, 5328.8156, 5328.4437, 5328.6963] +25-08-28 15:54:21 | D | - best error = [ 5329.6760, 5328.8156, 5328.4437, 5328.4437] +25-08-28 15:54:21 | D | + Adding low-rank branches to single_transformer_blocks.23.proj_mlp +25-08-28 15:54:22 | D | - Calibrating low-rank branch for single_transformer_blocks.23.proj_out.linears.1.linear +25-08-28 15:54:22 | D | + w: sint4 +25-08-28 15:54:22 | D | + x: sint4 +25-08-28 15:54:22 | D | + y: None +25-08-28 15:54:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:54:22 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 15:54:22 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 15:54:27 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 15:54:59 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:54:59 | D | - error = [ 9327.4734, 9312.6757, 9280.1595, 9273.5495, 9268.5869, 9268.1770, 9266.1090, 9253.5814, 9234.7847, 9233.0433] +25-08-28 15:54:59 | D | - best error = [ 9327.4734, 9312.6757, 9280.1595, 9273.5495, 9268.5869, 9268.1770, 9266.1090, 9253.5814, 9234.7847, 9233.0433] +25-08-28 15:55:02 | D | - iter = [ 10] +25-08-28 15:55:02 | D | - error = [ 9236.9298] +25-08-28 15:55:02 | D | - best error = [ 9233.0433] +25-08-28 15:55:02 | D | + Adding low-rank branches to single_transformer_blocks.23.proj_out.linears.1.linear +25-08-28 15:55:21 | D | - Calibrating low-rank branches of block single_transformer_blocks.24 +25-08-28 15:55:21 | D | - Calibrating low-rank branch for single_transformer_blocks.24.attn.to_q, single_transformer_blocks.24.attn.to_k, single_transformer_blocks.24.attn.to_v +25-08-28 15:55:21 | D | + w: sint4 +25-08-28 15:55:21 | D | + x: sint4 +25-08-28 15:55:21 | D | + y: None +25-08-28 15:55:21 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:55:21 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 15:55:21 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 15:55:22 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 15:55:42 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 15:55:42 | D | - error = [33689.4600, 32904.8451, 32611.2314, 32450.2031, 32274.8378, 32185.4844, 32175.7494, 32038.6966, 32101.1780] +25-08-28 15:55:42 | D | - best error = [33689.4600, 32904.8451, 32611.2314, 32450.2031, 32274.8378, 32185.4844, 32175.7494, 32038.6966, 32038.6966] +25-08-28 15:55:42 | D | + Adding low-rank branches to single_transformer_blocks.24.attn.to_q, single_transformer_blocks.24.attn.to_k, single_transformer_blocks.24.attn.to_v +25-08-28 15:55:42 | D | - Calibrating low-rank branch for single_transformer_blocks.24.proj_out.linears.0 +25-08-28 15:55:42 | D | + w: sint4 +25-08-28 15:55:42 | D | + x: sint4 +25-08-28 15:55:42 | D | + y: None +25-08-28 15:55:42 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:55:42 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 15:55:42 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 15:55:44 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 15:55:58 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:55:58 | D | - error = [ 3563.2404, 3520.6903, 3492.9971, 3475.8641, 3465.2879, 3454.3251, 3447.0614, 3440.0465, 3438.1162, 3431.1967] +25-08-28 15:55:58 | D | - best error = [ 3563.2404, 3520.6903, 3492.9971, 3475.8641, 3465.2879, 3454.3251, 3447.0614, 3440.0465, 3438.1162, 3431.1967] +25-08-28 15:56:01 | D | - iter = [ 10, 11] +25-08-28 15:56:01 | D | - error = [ 3426.8383, 3427.6927] +25-08-28 15:56:01 | D | - best error = [ 3426.8383, 3426.8383] +25-08-28 15:56:02 | D | + Adding low-rank branches to single_transformer_blocks.24.proj_out.linears.0 +25-08-28 15:56:02 | D | - Calibrating low-rank branch for single_transformer_blocks.24.proj_mlp +25-08-28 15:56:02 | D | + w: sint4 +25-08-28 15:56:02 | D | + x: sint4 +25-08-28 15:56:02 | D | + y: None +25-08-28 15:56:02 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:56:02 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 15:56:02 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 15:56:04 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 15:56:25 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:56:25 | D | - error = [ 5574.2415, 5550.6237, 5534.0025, 5523.0919, 5515.3745, 5509.0021, 5503.7940, 5498.8652, 5495.7382, 5492.7940] +25-08-28 15:56:25 | D | - best error = [ 5574.2415, 5550.6237, 5534.0025, 5523.0919, 5515.3745, 5509.0021, 5503.7940, 5498.8652, 5495.7382, 5492.7940] +25-08-28 15:56:47 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 15:56:47 | D | - error = [ 5489.8942, 5487.8681, 5486.4989, 5484.3014, 5482.0817, 5480.2473, 5478.7144, 5477.2500, 5476.1591, 5474.5551] +25-08-28 15:56:47 | D | - best error = [ 5489.8942, 5487.8681, 5486.4989, 5484.3014, 5482.0817, 5480.2473, 5478.7144, 5477.2500, 5476.1591, 5474.5551] +25-08-28 15:57:09 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 15:57:09 | D | - error = [ 5473.2087, 5471.7748, 5470.9264, 5470.1155, 5469.7060, 5469.2809, 5468.6752, 5468.2635, 5467.7831, 5467.0606] +25-08-28 15:57:09 | D | - best error = [ 5473.2087, 5471.7748, 5470.9264, 5470.1155, 5469.7060, 5469.2809, 5468.6752, 5468.2635, 5467.7831, 5467.0606] +25-08-28 15:57:31 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-28 15:57:31 | D | - error = [ 5466.6207, 5466.5053, 5466.1762, 5465.4442, 5465.1273, 5464.8791, 5464.1967, 5463.7705, 5463.1207, 5462.8037] +25-08-28 15:57:31 | D | - best error = [ 5466.6207, 5466.5053, 5466.1762, 5465.4442, 5465.1273, 5464.8791, 5464.1967, 5463.7705, 5463.1207, 5462.8037] +25-08-28 15:57:40 | D | - iter = [ 40, 41, 42, 43] +25-08-28 15:57:40 | D | - error = [ 5462.4055, 5462.0793, 5461.8691, 5461.9544] +25-08-28 15:57:40 | D | - best error = [ 5462.4055, 5462.0793, 5461.8691, 5461.8691] +25-08-28 15:57:40 | D | + Adding low-rank branches to single_transformer_blocks.24.proj_mlp +25-08-28 15:57:40 | D | - Calibrating low-rank branch for single_transformer_blocks.24.proj_out.linears.1.linear +25-08-28 15:57:40 | D | + w: sint4 +25-08-28 15:57:40 | D | + x: sint4 +25-08-28 15:57:40 | D | + y: None +25-08-28 15:57:40 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:57:40 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 15:57:40 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 15:57:45 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 15:57:59 | D | - iter = [ 0, 1, 2, 3] +25-08-28 15:57:59 | D | - error = [10130.3583, 10103.5621, 10080.1833, 10082.0169] +25-08-28 15:57:59 | D | - best error = [10130.3583, 10103.5621, 10080.1833, 10080.1833] +25-08-28 15:58:00 | D | + Adding low-rank branches to single_transformer_blocks.24.proj_out.linears.1.linear +25-08-28 15:58:18 | D | - Calibrating low-rank branches of block single_transformer_blocks.25 +25-08-28 15:58:18 | D | - Calibrating low-rank branch for single_transformer_blocks.25.attn.to_q, single_transformer_blocks.25.attn.to_k, single_transformer_blocks.25.attn.to_v +25-08-28 15:58:18 | D | + w: sint4 +25-08-28 15:58:18 | D | + x: sint4 +25-08-28 15:58:18 | D | + y: None +25-08-28 15:58:18 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:58:18 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 15:58:18 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 15:58:19 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 15:58:44 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:58:44 | D | - error = [48066.9187, 47170.3229, 46675.1127, 46369.8578, 46226.8881, 46058.8257, 45963.7060, 45794.0391, 45688.3327, 45469.6410] +25-08-28 15:58:44 | D | - best error = [48066.9187, 47170.3229, 46675.1127, 46369.8578, 46226.8881, 46058.8257, 45963.7060, 45794.0391, 45688.3327, 45469.6410] +25-08-28 15:58:46 | D | - iter = [ 10] +25-08-28 15:58:46 | D | - error = [45490.2905] +25-08-28 15:58:46 | D | - best error = [45469.6410] +25-08-28 15:58:46 | D | + Adding low-rank branches to single_transformer_blocks.25.attn.to_q, single_transformer_blocks.25.attn.to_k, single_transformer_blocks.25.attn.to_v +25-08-28 15:58:46 | D | - Calibrating low-rank branch for single_transformer_blocks.25.proj_out.linears.0 +25-08-28 15:58:46 | D | + w: sint4 +25-08-28 15:58:46 | D | + x: sint4 +25-08-28 15:58:46 | D | + y: None +25-08-28 15:58:46 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:58:46 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 15:58:46 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 15:58:48 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 15:59:03 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:59:03 | D | - error = [ 3956.4506, 3918.5483, 3895.8760, 3881.1603, 3863.1839, 3852.0650, 3844.9257, 3834.0446, 3827.5789, 3823.9321] +25-08-28 15:59:03 | D | - best error = [ 3956.4506, 3918.5483, 3895.8760, 3881.1603, 3863.1839, 3852.0650, 3844.9257, 3834.0446, 3827.5789, 3823.9321] +25-08-28 15:59:17 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18] +25-08-28 15:59:17 | D | - error = [ 3814.4327, 3811.9004, 3806.2379, 3804.6205, 3804.2984, 3802.7827, 3801.8184, 3800.2104, 3801.3216] +25-08-28 15:59:17 | D | - best error = [ 3814.4327, 3811.9004, 3806.2379, 3804.6205, 3804.2984, 3802.7827, 3801.8184, 3800.2104, 3800.2104] +25-08-28 15:59:18 | D | + Adding low-rank branches to single_transformer_blocks.25.proj_out.linears.0 +25-08-28 15:59:18 | D | - Calibrating low-rank branch for single_transformer_blocks.25.proj_mlp +25-08-28 15:59:18 | D | + w: sint4 +25-08-28 15:59:18 | D | + x: sint4 +25-08-28 15:59:18 | D | + y: None +25-08-28 15:59:18 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 15:59:18 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 15:59:18 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 15:59:20 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 15:59:42 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 15:59:42 | D | - error = [ 5264.7239, 5234.9632, 5216.6769, 5202.7846, 5192.5083, 5185.8689, 5178.9765, 5173.3969, 5168.4752, 5164.2939] +25-08-28 15:59:42 | D | - best error = [ 5264.7239, 5234.9632, 5216.6769, 5202.7846, 5192.5083, 5185.8689, 5178.9765, 5173.3969, 5168.4752, 5164.2939] +25-08-28 16:00:04 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 16:00:04 | D | - error = [ 5160.8189, 5159.0860, 5157.3272, 5154.6053, 5152.7598, 5151.5011, 5150.0458, 5148.1610, 5146.5923, 5144.8775] +25-08-28 16:00:04 | D | - best error = [ 5160.8189, 5159.0860, 5157.3272, 5154.6053, 5152.7598, 5151.5011, 5150.0458, 5148.1610, 5146.5923, 5144.8775] +25-08-28 16:00:25 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 16:00:25 | D | - error = [ 5143.6306, 5141.9203, 5141.2020, 5140.0844, 5138.5881, 5137.9221, 5137.3879, 5137.3114, 5136.2296, 5135.5253] +25-08-28 16:00:25 | D | - best error = [ 5143.6306, 5141.9203, 5141.2020, 5140.0844, 5138.5881, 5137.9221, 5137.3879, 5137.3114, 5136.2296, 5135.5253] +25-08-28 16:00:47 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-28 16:00:47 | D | - error = [ 5135.4844, 5134.9580, 5133.6872, 5132.8246, 5132.2206, 5131.8613, 5131.2955, 5130.0579, 5129.6261, 5129.4815] +25-08-28 16:00:47 | D | - best error = [ 5135.4844, 5134.9580, 5133.6872, 5132.8246, 5132.2206, 5131.8613, 5131.2955, 5130.0579, 5129.6261, 5129.4815] +25-08-28 16:00:56 | D | - iter = [ 40, 41, 42, 43] +25-08-28 16:00:56 | D | - error = [ 5128.3348, 5128.1558, 5127.5150, 5127.7223] +25-08-28 16:00:56 | D | - best error = [ 5128.3348, 5128.1558, 5127.5150, 5127.5150] +25-08-28 16:00:57 | D | + Adding low-rank branches to single_transformer_blocks.25.proj_mlp +25-08-28 16:00:57 | D | - Calibrating low-rank branch for single_transformer_blocks.25.proj_out.linears.1.linear +25-08-28 16:00:57 | D | + w: sint4 +25-08-28 16:00:57 | D | + x: sint4 +25-08-28 16:00:57 | D | + y: None +25-08-28 16:00:57 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:00:57 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 16:00:57 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 16:01:02 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 16:01:22 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 16:01:22 | D | - error = [10045.6534, 10023.0087, 9989.1441, 9962.9558, 9947.1540, 9949.9781] +25-08-28 16:01:22 | D | - best error = [10045.6534, 10023.0087, 9989.1441, 9962.9558, 9947.1540, 9947.1540] +25-08-28 16:01:22 | D | + Adding low-rank branches to single_transformer_blocks.25.proj_out.linears.1.linear +25-08-28 16:01:41 | D | - Calibrating low-rank branches of block single_transformer_blocks.26 +25-08-28 16:01:41 | D | - Calibrating low-rank branch for single_transformer_blocks.26.attn.to_q, single_transformer_blocks.26.attn.to_k, single_transformer_blocks.26.attn.to_v +25-08-28 16:01:41 | D | + w: sint4 +25-08-28 16:01:41 | D | + x: sint4 +25-08-28 16:01:41 | D | + y: None +25-08-28 16:01:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:01:41 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 16:01:41 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 16:01:42 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 16:02:04 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:02:04 | D | - error = [40097.8004, 39065.3180, 38717.7173, 38654.6904, 38544.0731, 38098.0226, 38006.2361, 37918.6987, 37851.1341, 37684.7437] +25-08-28 16:02:04 | D | - best error = [40097.8004, 39065.3180, 38717.7173, 38654.6904, 38544.0731, 38098.0226, 38006.2361, 37918.6987, 37851.1341, 37684.7437] +25-08-28 16:02:06 | D | - iter = [ 10] +25-08-28 16:02:06 | D | - error = [37702.9977] +25-08-28 16:02:06 | D | - best error = [37684.7437] +25-08-28 16:02:06 | D | + Adding low-rank branches to single_transformer_blocks.26.attn.to_q, single_transformer_blocks.26.attn.to_k, single_transformer_blocks.26.attn.to_v +25-08-28 16:02:07 | D | - Calibrating low-rank branch for single_transformer_blocks.26.proj_out.linears.0 +25-08-28 16:02:07 | D | + w: sint4 +25-08-28 16:02:07 | D | + x: sint4 +25-08-28 16:02:07 | D | + y: None +25-08-28 16:02:07 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:02:07 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 16:02:07 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 16:02:08 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 16:02:23 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:02:23 | D | - error = [ 3865.5657, 3808.3408, 3772.3148, 3751.8130, 3732.7937, 3715.1292, 3706.1028, 3698.7703, 3694.5579, 3692.0436] +25-08-28 16:02:23 | D | - best error = [ 3865.5657, 3808.3408, 3772.3148, 3751.8130, 3732.7937, 3715.1292, 3706.1028, 3698.7703, 3694.5579, 3692.0436] +25-08-28 16:02:30 | D | - iter = [ 10, 11, 12, 13, 14] +25-08-28 16:02:30 | D | - error = [ 3683.1307, 3681.3068, 3680.8282, 3679.0298, 3679.2969] +25-08-28 16:02:30 | D | - best error = [ 3683.1307, 3681.3068, 3680.8282, 3679.0298, 3679.0298] +25-08-28 16:02:30 | D | + Adding low-rank branches to single_transformer_blocks.26.proj_out.linears.0 +25-08-28 16:02:31 | D | - Calibrating low-rank branch for single_transformer_blocks.26.proj_mlp +25-08-28 16:02:31 | D | + w: sint4 +25-08-28 16:02:31 | D | + x: sint4 +25-08-28 16:02:31 | D | + y: None +25-08-28 16:02:31 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:02:31 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 16:02:31 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 16:02:33 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 16:02:54 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:02:54 | D | - error = [ 5251.1831, 5220.1806, 5200.8999, 5186.6561, 5177.8694, 5170.2646, 5163.8761, 5158.8487, 5154.4297, 5151.3820] +25-08-28 16:02:54 | D | - best error = [ 5251.1831, 5220.1806, 5200.8999, 5186.6561, 5177.8694, 5170.2646, 5163.8761, 5158.8487, 5154.4297, 5151.3820] +25-08-28 16:03:16 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 16:03:16 | D | - error = [ 5147.4753, 5144.7087, 5142.1035, 5140.0521, 5138.5774, 5136.5971, 5134.3742, 5133.2528, 5131.9224, 5130.5150] +25-08-28 16:03:16 | D | - best error = [ 5147.4753, 5144.7087, 5142.1035, 5140.0521, 5138.5774, 5136.5971, 5134.3742, 5133.2528, 5131.9224, 5130.5150] +25-08-28 16:03:38 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 16:03:38 | D | - error = [ 5128.4881, 5128.2110, 5127.3923, 5126.5450, 5125.5552, 5124.7819, 5123.6628, 5122.8719, 5122.1166, 5121.8415] +25-08-28 16:03:38 | D | - best error = [ 5128.4881, 5128.2110, 5127.3923, 5126.5450, 5125.5552, 5124.7819, 5123.6628, 5122.8719, 5122.1166, 5121.8415] +25-08-28 16:04:00 | D | - iter = [ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39] +25-08-28 16:04:00 | D | - error = [ 5121.6411, 5121.4974, 5121.4320, 5120.6509, 5120.5334, 5119.7490, 5119.5982, 5119.3093, 5119.0226, 5118.5396] +25-08-28 16:04:00 | D | - best error = [ 5121.6411, 5121.4974, 5121.4320, 5120.6509, 5120.5334, 5119.7490, 5119.5982, 5119.3093, 5119.0226, 5118.5396] +25-08-28 16:04:04 | D | - iter = [ 40, 41] +25-08-28 16:04:04 | D | - error = [ 5118.2065, 5118.3142] +25-08-28 16:04:04 | D | - best error = [ 5118.2065, 5118.2065] +25-08-28 16:04:05 | D | + Adding low-rank branches to single_transformer_blocks.26.proj_mlp +25-08-28 16:04:05 | D | - Calibrating low-rank branch for single_transformer_blocks.26.proj_out.linears.1.linear +25-08-28 16:04:05 | D | + w: sint4 +25-08-28 16:04:05 | D | + x: sint4 +25-08-28 16:04:05 | D | + y: None +25-08-28 16:04:05 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:04:05 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 16:04:05 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 16:04:10 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 16:04:22 | D | - iter = [ 0, 1, 2, 3] +25-08-28 16:04:22 | D | - error = [10300.0171, 10276.2717, 10251.0175, 10264.8036] +25-08-28 16:04:22 | D | - best error = [10300.0171, 10276.2717, 10251.0175, 10251.0175] +25-08-28 16:04:23 | D | + Adding low-rank branches to single_transformer_blocks.26.proj_out.linears.1.linear +25-08-28 16:04:41 | D | - Calibrating low-rank branches of block single_transformer_blocks.27 +25-08-28 16:04:41 | D | - Calibrating low-rank branch for single_transformer_blocks.27.attn.to_q, single_transformer_blocks.27.attn.to_k, single_transformer_blocks.27.attn.to_v +25-08-28 16:04:41 | D | + w: sint4 +25-08-28 16:04:41 | D | + x: sint4 +25-08-28 16:04:41 | D | + y: None +25-08-28 16:04:41 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:04:41 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 16:04:41 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 16:04:42 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 16:04:57 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 16:04:57 | D | - error = [61189.3596, 59377.7332, 58857.7480, 58406.1958, 58222.0808, 58055.2388, 58199.1549] +25-08-28 16:04:57 | D | - best error = [61189.3596, 59377.7332, 58857.7480, 58406.1958, 58222.0808, 58055.2388, 58055.2388] +25-08-28 16:04:58 | D | + Adding low-rank branches to single_transformer_blocks.27.attn.to_q, single_transformer_blocks.27.attn.to_k, single_transformer_blocks.27.attn.to_v +25-08-28 16:04:58 | D | - Calibrating low-rank branch for single_transformer_blocks.27.proj_out.linears.0 +25-08-28 16:04:58 | D | + w: sint4 +25-08-28 16:04:58 | D | + x: sint4 +25-08-28 16:04:58 | D | + y: None +25-08-28 16:04:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:04:58 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 16:04:58 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 16:04:59 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 16:05:14 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:05:14 | D | - error = [ 2958.3642, 2904.2619, 2872.9606, 2854.5002, 2842.2609, 2836.9945, 2827.5527, 2822.9011, 2817.6968, 2815.7699] +25-08-28 16:05:14 | D | - best error = [ 2958.3642, 2904.2619, 2872.9606, 2854.5002, 2842.2609, 2836.9945, 2827.5527, 2822.9011, 2817.6968, 2815.7699] +25-08-28 16:05:22 | D | - iter = [ 10, 11, 12, 13, 14] +25-08-28 16:05:22 | D | - error = [ 2813.1327, 2811.5046, 2808.7234, 2807.5324, 2807.7890] +25-08-28 16:05:22 | D | - best error = [ 2813.1327, 2811.5046, 2808.7234, 2807.5324, 2807.5324] +25-08-28 16:05:22 | D | + Adding low-rank branches to single_transformer_blocks.27.proj_out.linears.0 +25-08-28 16:05:22 | D | - Calibrating low-rank branch for single_transformer_blocks.27.proj_mlp +25-08-28 16:05:22 | D | + w: sint4 +25-08-28 16:05:22 | D | + x: sint4 +25-08-28 16:05:22 | D | + y: None +25-08-28 16:05:22 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:05:22 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 16:05:22 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 16:05:24 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 16:05:45 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:05:45 | D | - error = [ 5221.9546, 5195.5797, 5176.9721, 5162.5649, 5153.3389, 5145.1770, 5139.1517, 5134.2616, 5130.3076, 5125.5687] +25-08-28 16:05:45 | D | - best error = [ 5221.9546, 5195.5797, 5176.9721, 5162.5649, 5153.3389, 5145.1770, 5139.1517, 5134.2616, 5130.3076, 5125.5687] +25-08-28 16:06:07 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 16:06:07 | D | - error = [ 5121.8561, 5119.6680, 5117.1355, 5114.5361, 5112.3603, 5110.3139, 5108.7848, 5107.0369, 5105.9640, 5104.2578] +25-08-28 16:06:07 | D | - best error = [ 5121.8561, 5119.6680, 5117.1355, 5114.5361, 5112.3603, 5110.3139, 5108.7848, 5107.0369, 5105.9640, 5104.2578] +25-08-28 16:06:09 | D | - iter = [ 20] +25-08-28 16:06:09 | D | - error = [ 5104.3497] +25-08-28 16:06:09 | D | - best error = [ 5104.2578] +25-08-28 16:06:09 | D | + Adding low-rank branches to single_transformer_blocks.27.proj_mlp +25-08-28 16:06:09 | D | - Calibrating low-rank branch for single_transformer_blocks.27.proj_out.linears.1.linear +25-08-28 16:06:09 | D | + w: sint4 +25-08-28 16:06:09 | D | + x: sint4 +25-08-28 16:06:09 | D | + y: None +25-08-28 16:06:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:06:09 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 16:06:09 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 16:06:14 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 16:06:31 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-28 16:06:31 | D | - error = [10507.6920, 10472.1940, 10458.3398, 10410.1869, 10410.6603] +25-08-28 16:06:31 | D | - best error = [10507.6920, 10472.1940, 10458.3398, 10410.1869, 10410.1869] +25-08-28 16:06:32 | D | + Adding low-rank branches to single_transformer_blocks.27.proj_out.linears.1.linear +25-08-28 16:06:50 | D | - Calibrating low-rank branches of block single_transformer_blocks.28 +25-08-28 16:06:50 | D | - Calibrating low-rank branch for single_transformer_blocks.28.attn.to_q, single_transformer_blocks.28.attn.to_k, single_transformer_blocks.28.attn.to_v +25-08-28 16:06:50 | D | + w: sint4 +25-08-28 16:06:50 | D | + x: sint4 +25-08-28 16:06:50 | D | + y: None +25-08-28 16:06:50 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:06:50 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 16:06:50 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 16:06:51 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 16:07:07 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 16:07:07 | D | - error = [61098.1280, 59015.1276, 57812.8822, 57551.9644, 57194.7791, 56827.1056, 56845.4849] +25-08-28 16:07:07 | D | - best error = [61098.1280, 59015.1276, 57812.8822, 57551.9644, 57194.7791, 56827.1056, 56827.1056] +25-08-28 16:07:07 | D | + Adding low-rank branches to single_transformer_blocks.28.attn.to_q, single_transformer_blocks.28.attn.to_k, single_transformer_blocks.28.attn.to_v +25-08-28 16:07:07 | D | - Calibrating low-rank branch for single_transformer_blocks.28.proj_out.linears.0 +25-08-28 16:07:07 | D | + w: sint4 +25-08-28 16:07:07 | D | + x: sint4 +25-08-28 16:07:07 | D | + y: None +25-08-28 16:07:07 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:07:07 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 16:07:07 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 16:07:08 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 16:07:23 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:07:23 | D | - error = [ 3497.8180, 3466.6628, 3440.3250, 3423.8826, 3411.5258, 3398.6989, 3385.8833, 3380.7294, 3375.0015, 3371.3328] +25-08-28 16:07:23 | D | - best error = [ 3497.8180, 3466.6628, 3440.3250, 3423.8826, 3411.5258, 3398.6989, 3385.8833, 3380.7294, 3375.0015, 3371.3328] +25-08-28 16:07:38 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 16:07:38 | D | - error = [ 3367.1549, 3364.8168, 3361.4445, 3358.4720, 3356.2342, 3354.1677, 3351.5480, 3351.0634, 3348.6959, 3349.1814] +25-08-28 16:07:38 | D | - best error = [ 3367.1549, 3364.8168, 3361.4445, 3358.4720, 3356.2342, 3354.1677, 3351.5480, 3351.0634, 3348.6959, 3348.6959] +25-08-28 16:07:38 | D | + Adding low-rank branches to single_transformer_blocks.28.proj_out.linears.0 +25-08-28 16:07:38 | D | - Calibrating low-rank branch for single_transformer_blocks.28.proj_mlp +25-08-28 16:07:38 | D | + w: sint4 +25-08-28 16:07:38 | D | + x: sint4 +25-08-28 16:07:38 | D | + y: None +25-08-28 16:07:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:07:38 | D | + finished parsing calibration arguments, ram usage: 12.7 +25-08-28 16:07:38 | D | + finished resetting calibrator, ram usage: 12.7 +25-08-28 16:07:40 | D | + finished calculating the original outputs, ram usage: 12.7 +25-08-28 16:08:01 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:08:01 | D | - error = [ 5588.8705, 5552.7849, 5530.4503, 5514.4075, 5501.8226, 5491.5151, 5484.0534, 5479.0008, 5474.0913, 5471.0315] +25-08-28 16:08:01 | D | - best error = [ 5588.8705, 5552.7849, 5530.4503, 5514.4075, 5501.8226, 5491.5151, 5484.0534, 5479.0008, 5474.0913, 5471.0315] +25-08-28 16:08:23 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 16:08:23 | D | - error = [ 5466.8005, 5464.4714, 5462.0602, 5459.8414, 5457.7536, 5455.5057, 5453.5161, 5451.6757, 5449.7635, 5447.7070] +25-08-28 16:08:23 | D | - best error = [ 5466.8005, 5464.4714, 5462.0602, 5459.8414, 5457.7536, 5455.5057, 5453.5161, 5451.6757, 5449.7635, 5447.7070] +25-08-28 16:08:38 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26] +25-08-28 16:08:38 | D | - error = [ 5447.2009, 5446.1317, 5445.2910, 5443.9842, 5443.2033, 5442.6828, 5442.7680] +25-08-28 16:08:38 | D | - best error = [ 5447.2009, 5446.1317, 5445.2910, 5443.9842, 5443.2033, 5442.6828, 5442.6828] +25-08-28 16:08:38 | D | + Adding low-rank branches to single_transformer_blocks.28.proj_mlp +25-08-28 16:08:38 | D | - Calibrating low-rank branch for single_transformer_blocks.28.proj_out.linears.1.linear +25-08-28 16:08:38 | D | + w: sint4 +25-08-28 16:08:38 | D | + x: sint4 +25-08-28 16:08:38 | D | + y: None +25-08-28 16:08:38 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:08:38 | D | + finished parsing calibration arguments, ram usage: 12.8 +25-08-28 16:08:39 | D | + finished resetting calibrator, ram usage: 12.8 +25-08-28 16:08:44 | D | + finished calculating the original outputs, ram usage: 12.9 +25-08-28 16:09:13 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 16:09:13 | D | - error = [ 9333.7452, 9328.1617, 9296.1338, 9263.9292, 9255.8643, 9247.5484, 9239.1039, 9226.5252, 9233.6342] +25-08-28 16:09:13 | D | - best error = [ 9333.7452, 9328.1617, 9296.1338, 9263.9292, 9255.8643, 9247.5484, 9239.1039, 9226.5252, 9226.5252] +25-08-28 16:09:13 | D | + Adding low-rank branches to single_transformer_blocks.28.proj_out.linears.1.linear +25-08-28 16:09:32 | D | - Calibrating low-rank branches of block single_transformer_blocks.29 +25-08-28 16:09:32 | D | - Calibrating low-rank branch for single_transformer_blocks.29.attn.to_q, single_transformer_blocks.29.attn.to_k, single_transformer_blocks.29.attn.to_v +25-08-28 16:09:32 | D | + w: sint4 +25-08-28 16:09:32 | D | + x: sint4 +25-08-28 16:09:32 | D | + y: None +25-08-28 16:09:32 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:09:32 | D | + finished parsing calibration arguments, ram usage: 12.8 +25-08-28 16:09:32 | D | + finished resetting calibrator, ram usage: 12.8 +25-08-28 16:09:33 | D | + finished calculating the original outputs, ram usage: 12.8 +25-08-28 16:09:48 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 16:09:48 | D | - error = [49422.9407, 48157.6637, 47372.5578, 47059.9278, 46893.4943, 46506.3333, 46518.2741] +25-08-28 16:09:48 | D | - best error = [49422.9407, 48157.6637, 47372.5578, 47059.9278, 46893.4943, 46506.3333, 46506.3333] +25-08-28 16:09:49 | D | + Adding low-rank branches to single_transformer_blocks.29.attn.to_q, single_transformer_blocks.29.attn.to_k, single_transformer_blocks.29.attn.to_v +25-08-28 16:09:49 | D | - Calibrating low-rank branch for single_transformer_blocks.29.proj_out.linears.0 +25-08-28 16:09:49 | D | + w: sint4 +25-08-28 16:09:49 | D | + x: sint4 +25-08-28 16:09:49 | D | + y: None +25-08-28 16:09:49 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:09:49 | D | + finished parsing calibration arguments, ram usage: 12.8 +25-08-28 16:09:49 | D | + finished resetting calibrator, ram usage: 12.8 +25-08-28 16:09:50 | D | + finished calculating the original outputs, ram usage: 12.8 +25-08-28 16:10:05 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:10:05 | D | - error = [ 3284.1603, 3234.5389, 3205.9332, 3189.1753, 3177.5566, 3165.9400, 3154.8147, 3147.7316, 3147.3158, 3142.0729] +25-08-28 16:10:05 | D | - best error = [ 3284.1603, 3234.5389, 3205.9332, 3189.1753, 3177.5566, 3165.9400, 3154.8147, 3147.7316, 3147.3158, 3142.0729] +25-08-28 16:10:08 | D | - iter = [ 10, 11] +25-08-28 16:10:08 | D | - error = [ 3140.0242, 3142.2629] +25-08-28 16:10:08 | D | - best error = [ 3140.0242, 3140.0242] +25-08-28 16:10:08 | D | + Adding low-rank branches to single_transformer_blocks.29.proj_out.linears.0 +25-08-28 16:10:08 | D | - Calibrating low-rank branch for single_transformer_blocks.29.proj_mlp +25-08-28 16:10:08 | D | + w: sint4 +25-08-28 16:10:08 | D | + x: sint4 +25-08-28 16:10:08 | D | + y: None +25-08-28 16:10:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:10:08 | D | + finished parsing calibration arguments, ram usage: 12.8 +25-08-28 16:10:08 | D | + finished resetting calibrator, ram usage: 12.8 +25-08-28 16:10:10 | D | + finished calculating the original outputs, ram usage: 12.8 +25-08-28 16:10:32 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:10:32 | D | - error = [ 5473.1117, 5446.6959, 5429.1160, 5417.0579, 5407.9676, 5400.6303, 5395.3026, 5391.2340, 5386.4716, 5382.0108] +25-08-28 16:10:32 | D | - best error = [ 5473.1117, 5446.6959, 5429.1160, 5417.0579, 5407.9676, 5400.6303, 5395.3026, 5391.2340, 5386.4716, 5382.0108] +25-08-28 16:10:56 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 16:10:56 | D | - error = [ 5378.5840, 5376.4286, 5374.8721, 5371.7956, 5368.3649, 5366.9837, 5365.1738, 5362.7387, 5361.4476, 5360.2922] +25-08-28 16:10:56 | D | - best error = [ 5378.5840, 5376.4286, 5374.8721, 5371.7956, 5368.3649, 5366.9837, 5365.1738, 5362.7387, 5361.4476, 5360.2922] +25-08-28 16:11:13 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27] +25-08-28 16:11:13 | D | - error = [ 5358.6851, 5357.9593, 5357.1588, 5355.1691, 5354.8918, 5353.7488, 5352.4365, 5352.6194] +25-08-28 16:11:13 | D | - best error = [ 5358.6851, 5357.9593, 5357.1588, 5355.1691, 5354.8918, 5353.7488, 5352.4365, 5352.4365] +25-08-28 16:11:13 | D | + Adding low-rank branches to single_transformer_blocks.29.proj_mlp +25-08-28 16:11:14 | D | - Calibrating low-rank branch for single_transformer_blocks.29.proj_out.linears.1.linear +25-08-28 16:11:14 | D | + w: sint4 +25-08-28 16:11:14 | D | + x: sint4 +25-08-28 16:11:14 | D | + y: None +25-08-28 16:11:14 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:11:14 | D | + finished parsing calibration arguments, ram usage: 13.3 +25-08-28 16:11:14 | D | + finished resetting calibrator, ram usage: 13.3 +25-08-28 16:11:19 | D | + finished calculating the original outputs, ram usage: 13.3 +25-08-28 16:11:41 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 16:11:41 | D | - error = [ 9156.8462, 9127.5666, 9125.8218, 9097.2805, 9086.1618, 9075.0537, 9079.0629] +25-08-28 16:11:41 | D | - best error = [ 9156.8462, 9127.5666, 9125.8218, 9097.2805, 9086.1618, 9075.0537, 9075.0537] +25-08-28 16:11:42 | D | + Adding low-rank branches to single_transformer_blocks.29.proj_out.linears.1.linear +25-08-28 16:12:00 | D | - Calibrating low-rank branches of block single_transformer_blocks.30 +25-08-28 16:12:00 | D | - Calibrating low-rank branch for single_transformer_blocks.30.attn.to_q, single_transformer_blocks.30.attn.to_k, single_transformer_blocks.30.attn.to_v +25-08-28 16:12:00 | D | + w: sint4 +25-08-28 16:12:00 | D | + x: sint4 +25-08-28 16:12:00 | D | + y: None +25-08-28 16:12:00 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:12:00 | D | + finished parsing calibration arguments, ram usage: 13.5 +25-08-28 16:12:00 | D | + finished resetting calibrator, ram usage: 13.5 +25-08-28 16:12:01 | D | + finished calculating the original outputs, ram usage: 13.5 +25-08-28 16:12:12 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-28 16:12:12 | D | - error = [75002.9077, 73608.0343, 72989.8325, 72461.0165, 72713.0170] +25-08-28 16:12:12 | D | - best error = [75002.9077, 73608.0343, 72989.8325, 72461.0165, 72461.0165] +25-08-28 16:12:12 | D | + Adding low-rank branches to single_transformer_blocks.30.attn.to_q, single_transformer_blocks.30.attn.to_k, single_transformer_blocks.30.attn.to_v +25-08-28 16:12:12 | D | - Calibrating low-rank branch for single_transformer_blocks.30.proj_out.linears.0 +25-08-28 16:12:12 | D | + w: sint4 +25-08-28 16:12:12 | D | + x: sint4 +25-08-28 16:12:12 | D | + y: None +25-08-28 16:12:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:12:12 | D | + finished parsing calibration arguments, ram usage: 13.5 +25-08-28 16:12:12 | D | + finished resetting calibrator, ram usage: 13.5 +25-08-28 16:12:13 | D | + finished calculating the original outputs, ram usage: 13.5 +25-08-28 16:12:29 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:12:29 | D | - error = [ 3282.2791, 3236.4196, 3215.1774, 3207.1127, 3191.6858, 3183.9799, 3174.5302, 3170.0233, 3164.5426, 3158.7881] +25-08-28 16:12:29 | D | - best error = [ 3282.2791, 3236.4196, 3215.1774, 3207.1127, 3191.6858, 3183.9799, 3174.5302, 3170.0233, 3164.5426, 3158.7881] +25-08-28 16:12:39 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16] +25-08-28 16:12:39 | D | - error = [ 3153.6975, 3153.5710, 3151.2926, 3151.1428, 3149.3488, 3147.7957, 3149.9852] +25-08-28 16:12:39 | D | - best error = [ 3153.6975, 3153.5710, 3151.2926, 3151.1428, 3149.3488, 3147.7957, 3147.7957] +25-08-28 16:12:39 | D | + Adding low-rank branches to single_transformer_blocks.30.proj_out.linears.0 +25-08-28 16:12:39 | D | - Calibrating low-rank branch for single_transformer_blocks.30.proj_mlp +25-08-28 16:12:39 | D | + w: sint4 +25-08-28 16:12:39 | D | + x: sint4 +25-08-28 16:12:39 | D | + y: None +25-08-28 16:12:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:12:39 | D | + finished parsing calibration arguments, ram usage: 13.5 +25-08-28 16:12:39 | D | + finished resetting calibrator, ram usage: 13.5 +25-08-28 16:12:41 | D | + finished calculating the original outputs, ram usage: 13.5 +25-08-28 16:13:02 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:13:02 | D | - error = [ 5417.5347, 5392.1348, 5375.6717, 5363.3383, 5354.8920, 5348.3093, 5342.6158, 5336.5138, 5332.2929, 5330.0056] +25-08-28 16:13:02 | D | - best error = [ 5417.5347, 5392.1348, 5375.6717, 5363.3383, 5354.8920, 5348.3093, 5342.6158, 5336.5138, 5332.2929, 5330.0056] +25-08-28 16:13:23 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 16:13:23 | D | - error = [ 5327.1884, 5324.8482, 5323.3245, 5321.8641, 5319.8147, 5318.7465, 5317.2640, 5315.9334, 5314.4932, 5313.4524] +25-08-28 16:13:23 | D | - best error = [ 5327.1884, 5324.8482, 5323.3245, 5321.8641, 5319.8147, 5318.7465, 5317.2640, 5315.9334, 5314.4932, 5313.4524] +25-08-28 16:13:45 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 16:13:45 | D | - error = [ 5312.2827, 5311.1903, 5310.1812, 5309.4553, 5308.6471, 5307.5772, 5306.6434, 5305.6741, 5304.8127, 5304.5946] +25-08-28 16:13:45 | D | - best error = [ 5312.2827, 5311.1903, 5310.1812, 5309.4553, 5308.6471, 5307.5772, 5306.6434, 5305.6741, 5304.8127, 5304.5946] +25-08-28 16:13:56 | D | - iter = [ 30, 31, 32, 33, 34] +25-08-28 16:13:56 | D | - error = [ 5304.1018, 5303.5650, 5303.0316, 5302.7608, 5303.1549] +25-08-28 16:13:56 | D | - best error = [ 5304.1018, 5303.5650, 5303.0316, 5302.7608, 5302.7608] +25-08-28 16:13:56 | D | + Adding low-rank branches to single_transformer_blocks.30.proj_mlp +25-08-28 16:13:56 | D | - Calibrating low-rank branch for single_transformer_blocks.30.proj_out.linears.1.linear +25-08-28 16:13:56 | D | + w: sint4 +25-08-28 16:13:56 | D | + x: sint4 +25-08-28 16:13:56 | D | + y: None +25-08-28 16:13:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:13:56 | D | + finished parsing calibration arguments, ram usage: 13.5 +25-08-28 16:13:56 | D | + finished resetting calibrator, ram usage: 13.5 +25-08-28 16:14:01 | D | + finished calculating the original outputs, ram usage: 13.5 +25-08-28 16:14:18 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-28 16:14:18 | D | - error = [ 2801.5853, 2794.8585, 2792.7295, 2786.3022, 2793.6665] +25-08-28 16:14:18 | D | - best error = [ 2801.5853, 2794.8585, 2792.7295, 2786.3022, 2786.3022] +25-08-28 16:14:18 | D | + Adding low-rank branches to single_transformer_blocks.30.proj_out.linears.1.linear +25-08-28 16:14:36 | D | - Calibrating low-rank branches of block single_transformer_blocks.31 +25-08-28 16:14:36 | D | - Calibrating low-rank branch for single_transformer_blocks.31.attn.to_q, single_transformer_blocks.31.attn.to_k, single_transformer_blocks.31.attn.to_v +25-08-28 16:14:36 | D | + w: sint4 +25-08-28 16:14:36 | D | + x: sint4 +25-08-28 16:14:36 | D | + y: None +25-08-28 16:14:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:14:36 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:14:36 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:14:38 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:14:55 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 16:14:55 | D | - error = [75229.0769, 73288.2022, 72213.0714, 71479.2546, 71330.5231, 71018.6599, 70955.0663, 71031.4878] +25-08-28 16:14:55 | D | - best error = [75229.0769, 73288.2022, 72213.0714, 71479.2546, 71330.5231, 71018.6599, 70955.0663, 70955.0663] +25-08-28 16:14:55 | D | + Adding low-rank branches to single_transformer_blocks.31.attn.to_q, single_transformer_blocks.31.attn.to_k, single_transformer_blocks.31.attn.to_v +25-08-28 16:14:56 | D | - Calibrating low-rank branch for single_transformer_blocks.31.proj_out.linears.0 +25-08-28 16:14:56 | D | + w: sint4 +25-08-28 16:14:56 | D | + x: sint4 +25-08-28 16:14:56 | D | + y: None +25-08-28 16:14:56 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:14:56 | D | + finished parsing calibration arguments, ram usage: 13.5 +25-08-28 16:14:56 | D | + finished resetting calibrator, ram usage: 13.5 +25-08-28 16:14:57 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:15:12 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:15:12 | D | - error = [ 3622.7811, 3573.5554, 3539.6255, 3523.3622, 3510.3261, 3500.2053, 3487.9986, 3479.5527, 3476.7919, 3472.5073] +25-08-28 16:15:12 | D | - best error = [ 3622.7811, 3573.5554, 3539.6255, 3523.3622, 3510.3261, 3500.2053, 3487.9986, 3479.5527, 3476.7919, 3472.5073] +25-08-28 16:15:15 | D | - iter = [ 10, 11] +25-08-28 16:15:15 | D | - error = [ 3468.9361, 3471.7732] +25-08-28 16:15:15 | D | - best error = [ 3468.9361, 3468.9361] +25-08-28 16:15:15 | D | + Adding low-rank branches to single_transformer_blocks.31.proj_out.linears.0 +25-08-28 16:15:15 | D | - Calibrating low-rank branch for single_transformer_blocks.31.proj_mlp +25-08-28 16:15:15 | D | + w: sint4 +25-08-28 16:15:15 | D | + x: sint4 +25-08-28 16:15:15 | D | + y: None +25-08-28 16:15:15 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:15:15 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:15:15 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:15:17 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:15:38 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:15:38 | D | - error = [ 5356.9196, 5326.7082, 5308.6222, 5296.7065, 5288.5820, 5281.6865, 5275.0094, 5270.7861, 5267.1248, 5263.3503] +25-08-28 16:15:38 | D | - best error = [ 5356.9196, 5326.7082, 5308.6222, 5296.7065, 5288.5820, 5281.6865, 5275.0094, 5270.7861, 5267.1248, 5263.3503] +25-08-28 16:16:00 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 16:16:00 | D | - error = [ 5258.4283, 5255.9406, 5254.0112, 5251.6316, 5250.0023, 5247.5390, 5245.2994, 5243.6369, 5242.4078, 5241.2989] +25-08-28 16:16:00 | D | - best error = [ 5258.4283, 5255.9406, 5254.0112, 5251.6316, 5250.0023, 5247.5390, 5245.2994, 5243.6369, 5242.4078, 5241.2989] +25-08-28 16:16:22 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 16:16:22 | D | - error = [ 5240.9358, 5239.9210, 5238.9621, 5237.8005, 5236.5550, 5235.7893, 5235.3649, 5234.9672, 5234.0581, 5233.0377] +25-08-28 16:16:22 | D | - best error = [ 5240.9358, 5239.9210, 5238.9621, 5237.8005, 5236.5550, 5235.7893, 5235.3649, 5234.9672, 5234.0581, 5233.0377] +25-08-28 16:16:32 | D | - iter = [ 30, 31, 32, 33, 34] +25-08-28 16:16:32 | D | - error = [ 5232.4021, 5231.4948, 5230.3822, 5229.4887, 5229.6086] +25-08-28 16:16:32 | D | - best error = [ 5232.4021, 5231.4948, 5230.3822, 5229.4887, 5229.4887] +25-08-28 16:16:33 | D | + Adding low-rank branches to single_transformer_blocks.31.proj_mlp +25-08-28 16:16:33 | D | - Calibrating low-rank branch for single_transformer_blocks.31.proj_out.linears.1.linear +25-08-28 16:16:33 | D | + w: sint4 +25-08-28 16:16:33 | D | + x: sint4 +25-08-28 16:16:33 | D | + y: None +25-08-28 16:16:33 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:16:33 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:16:33 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:16:38 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:17:00 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 16:17:00 | D | - error = [ 9051.3486, 9041.8675, 9033.1106, 9030.2518, 9006.8054, 8996.4180, 9003.8738] +25-08-28 16:17:00 | D | - best error = [ 9051.3486, 9041.8675, 9033.1106, 9030.2518, 9006.8054, 8996.4180, 8996.4180] +25-08-28 16:17:01 | D | + Adding low-rank branches to single_transformer_blocks.31.proj_out.linears.1.linear +25-08-28 16:17:19 | D | - Calibrating low-rank branches of block single_transformer_blocks.32 +25-08-28 16:17:19 | D | - Calibrating low-rank branch for single_transformer_blocks.32.attn.to_q, single_transformer_blocks.32.attn.to_k, single_transformer_blocks.32.attn.to_v +25-08-28 16:17:19 | D | + w: sint4 +25-08-28 16:17:19 | D | + x: sint4 +25-08-28 16:17:19 | D | + y: None +25-08-28 16:17:19 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:17:19 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:17:19 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:17:20 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:17:36 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6] +25-08-28 16:17:36 | D | - error = [94261.6695, 92233.2237, 91169.5528, 90424.6508, 90294.6841, 89865.8171, 89897.1758] +25-08-28 16:17:36 | D | - best error = [94261.6695, 92233.2237, 91169.5528, 90424.6508, 90294.6841, 89865.8171, 89865.8171] +25-08-28 16:17:36 | D | + Adding low-rank branches to single_transformer_blocks.32.attn.to_q, single_transformer_blocks.32.attn.to_k, single_transformer_blocks.32.attn.to_v +25-08-28 16:17:36 | D | - Calibrating low-rank branch for single_transformer_blocks.32.proj_out.linears.0 +25-08-28 16:17:36 | D | + w: sint4 +25-08-28 16:17:36 | D | + x: sint4 +25-08-28 16:17:36 | D | + y: None +25-08-28 16:17:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:17:36 | D | + finished parsing calibration arguments, ram usage: 13.5 +25-08-28 16:17:36 | D | + finished resetting calibrator, ram usage: 13.5 +25-08-28 16:17:38 | D | + finished calculating the original outputs, ram usage: 13.5 +25-08-28 16:17:52 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:17:52 | D | - error = [ 4195.8746, 4129.9637, 4091.2064, 4067.3386, 4042.2818, 4037.2746, 4021.8851, 4018.4237, 4009.2950, 4001.1262] +25-08-28 16:17:52 | D | - best error = [ 4195.8746, 4129.9637, 4091.2064, 4067.3386, 4042.2818, 4037.2746, 4021.8851, 4018.4237, 4009.2950, 4001.1262] +25-08-28 16:18:08 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 16:18:08 | D | - error = [ 3996.9795, 3993.7644, 3988.1125, 3982.6418, 3976.7755, 3973.8513, 3971.1639, 3968.0513, 3967.6658, 3971.5366] +25-08-28 16:18:08 | D | - best error = [ 3996.9795, 3993.7644, 3988.1125, 3982.6418, 3976.7755, 3973.8513, 3971.1639, 3968.0513, 3967.6658, 3967.6658] +25-08-28 16:18:08 | D | + Adding low-rank branches to single_transformer_blocks.32.proj_out.linears.0 +25-08-28 16:18:08 | D | - Calibrating low-rank branch for single_transformer_blocks.32.proj_mlp +25-08-28 16:18:08 | D | + w: sint4 +25-08-28 16:18:08 | D | + x: sint4 +25-08-28 16:18:08 | D | + y: None +25-08-28 16:18:08 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:18:08 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:18:08 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:18:10 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:18:31 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:18:31 | D | - error = [ 5238.1863, 5216.0780, 5201.4213, 5192.1381, 5185.2383, 5180.1799, 5174.9823, 5171.1509, 5167.3318, 5164.9743] +25-08-28 16:18:31 | D | - best error = [ 5238.1863, 5216.0780, 5201.4213, 5192.1381, 5185.2383, 5180.1799, 5174.9823, 5171.1509, 5167.3318, 5164.9743] +25-08-28 16:18:53 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 16:18:53 | D | - error = [ 5162.0488, 5158.4837, 5157.1649, 5156.0970, 5154.5174, 5151.6564, 5151.0498, 5149.1135, 5147.1835, 5147.2093] +25-08-28 16:18:53 | D | - best error = [ 5162.0488, 5158.4837, 5157.1649, 5156.0970, 5154.5174, 5151.6564, 5151.0498, 5149.1135, 5147.1835, 5147.1835] +25-08-28 16:18:53 | D | + Adding low-rank branches to single_transformer_blocks.32.proj_mlp +25-08-28 16:18:53 | D | - Calibrating low-rank branch for single_transformer_blocks.32.proj_out.linears.1.linear +25-08-28 16:18:53 | D | + w: sint4 +25-08-28 16:18:53 | D | + x: sint4 +25-08-28 16:18:53 | D | + y: None +25-08-28 16:18:53 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:18:53 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:18:53 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:18:58 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:19:18 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 16:19:18 | D | - error = [ 3292.6801, 3287.9519, 3284.2829, 3276.2595, 3275.2325, 3275.5058] +25-08-28 16:19:18 | D | - best error = [ 3292.6801, 3287.9519, 3284.2829, 3276.2595, 3275.2325, 3275.2325] +25-08-28 16:19:18 | D | + Adding low-rank branches to single_transformer_blocks.32.proj_out.linears.1.linear +25-08-28 16:19:36 | D | - Calibrating low-rank branches of block single_transformer_blocks.33 +25-08-28 16:19:36 | D | - Calibrating low-rank branch for single_transformer_blocks.33.attn.to_q, single_transformer_blocks.33.attn.to_k, single_transformer_blocks.33.attn.to_v +25-08-28 16:19:36 | D | + w: sint4 +25-08-28 16:19:36 | D | + x: sint4 +25-08-28 16:19:36 | D | + y: None +25-08-28 16:19:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:19:36 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:19:36 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:19:38 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:19:59 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:19:59 | D | - error = [90492.3224, 88255.4141, 86535.2393, 86242.7755, 85097.9370, 84741.1358, 84210.3302, 83838.2778, 83732.3431, 83970.8628] +25-08-28 16:19:59 | D | - best error = [90492.3224, 88255.4141, 86535.2393, 86242.7755, 85097.9370, 84741.1358, 84210.3302, 83838.2778, 83732.3431, 83732.3431] +25-08-28 16:20:00 | D | + Adding low-rank branches to single_transformer_blocks.33.attn.to_q, single_transformer_blocks.33.attn.to_k, single_transformer_blocks.33.attn.to_v +25-08-28 16:20:00 | D | - Calibrating low-rank branch for single_transformer_blocks.33.proj_out.linears.0 +25-08-28 16:20:00 | D | + w: sint4 +25-08-28 16:20:00 | D | + x: sint4 +25-08-28 16:20:00 | D | + y: None +25-08-28 16:20:00 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:20:00 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:20:00 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:20:01 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:20:16 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:20:16 | D | - error = [ 2021.6375, 2000.2669, 1989.3111, 1975.3016, 1965.4620, 1959.7743, 1955.3323, 1949.7290, 1945.9794, 1945.6930] +25-08-28 16:20:16 | D | - best error = [ 2021.6375, 2000.2669, 1989.3111, 1975.3016, 1965.4620, 1959.7743, 1955.3323, 1949.7290, 1945.9794, 1945.6930] +25-08-28 16:20:31 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 16:20:31 | D | - error = [ 1943.3277, 1941.8631, 1940.0421, 1936.6212, 1934.4251, 1933.1464, 1932.0012, 1929.3219, 1927.2174, 1925.6011] +25-08-28 16:20:31 | D | - best error = [ 1943.3277, 1941.8631, 1940.0421, 1936.6212, 1934.4251, 1933.1464, 1932.0012, 1929.3219, 1927.2174, 1925.6011] +25-08-28 16:20:34 | D | - iter = [ 20, 21] +25-08-28 16:20:34 | D | - error = [ 1924.5178, 1926.2344] +25-08-28 16:20:34 | D | - best error = [ 1924.5178, 1924.5178] +25-08-28 16:20:34 | D | + Adding low-rank branches to single_transformer_blocks.33.proj_out.linears.0 +25-08-28 16:20:34 | D | - Calibrating low-rank branch for single_transformer_blocks.33.proj_mlp +25-08-28 16:20:34 | D | + w: sint4 +25-08-28 16:20:34 | D | + x: sint4 +25-08-28 16:20:34 | D | + y: None +25-08-28 16:20:34 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:20:34 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:20:34 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:20:36 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:20:58 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:20:58 | D | - error = [ 5269.9454, 5244.6535, 5227.0213, 5214.3567, 5205.8315, 5198.1279, 5191.2842, 5187.4793, 5182.3325, 5178.3596] +25-08-28 16:20:58 | D | - best error = [ 5269.9454, 5244.6535, 5227.0213, 5214.3567, 5205.8315, 5198.1279, 5191.2842, 5187.4793, 5182.3325, 5178.3596] +25-08-28 16:21:20 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 16:21:20 | D | - error = [ 5177.8173, 5174.4971, 5171.6123, 5169.1311, 5167.9034, 5166.6388, 5164.2802, 5162.8914, 5160.8811, 5159.6203] +25-08-28 16:21:20 | D | - best error = [ 5177.8173, 5174.4971, 5171.6123, 5169.1311, 5167.9034, 5166.6388, 5164.2802, 5162.8914, 5160.8811, 5159.6203] +25-08-28 16:21:31 | D | - iter = [ 20, 21, 22, 23, 24] +25-08-28 16:21:31 | D | - error = [ 5158.7444, 5158.4370, 5157.2483, 5155.6703, 5155.7248] +25-08-28 16:21:31 | D | - best error = [ 5158.7444, 5158.4370, 5157.2483, 5155.6703, 5155.6703] +25-08-28 16:21:31 | D | + Adding low-rank branches to single_transformer_blocks.33.proj_mlp +25-08-28 16:21:31 | D | - Calibrating low-rank branch for single_transformer_blocks.33.proj_out.linears.1.linear +25-08-28 16:21:31 | D | + w: sint4 +25-08-28 16:21:31 | D | + x: sint4 +25-08-28 16:21:31 | D | + y: None +25-08-28 16:21:31 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:21:31 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:21:31 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:21:36 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:21:47 | D | - iter = [ 0, 1, 2] +25-08-28 16:21:47 | D | - error = [10144.2673, 10128.1694, 10138.7037] +25-08-28 16:21:47 | D | - best error = [10144.2673, 10128.1694, 10128.1694] +25-08-28 16:21:47 | D | + Adding low-rank branches to single_transformer_blocks.33.proj_out.linears.1.linear +25-08-28 16:22:06 | D | - Calibrating low-rank branches of block single_transformer_blocks.34 +25-08-28 16:22:06 | D | - Calibrating low-rank branch for single_transformer_blocks.34.attn.to_q, single_transformer_blocks.34.attn.to_k, single_transformer_blocks.34.attn.to_v +25-08-28 16:22:06 | D | + w: sint4 +25-08-28 16:22:06 | D | + x: sint4 +25-08-28 16:22:06 | D | + y: None +25-08-28 16:22:06 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:22:06 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:22:06 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:22:07 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:22:25 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7] +25-08-28 16:22:25 | D | - error = [117485.5628, 115142.1899, 114168.6590, 112661.4434, 111934.6049, 111783.5577, 111229.8182, 111778.1391] +25-08-28 16:22:25 | D | - best error = [117485.5628, 115142.1899, 114168.6590, 112661.4434, 111934.6049, 111783.5577, 111229.8182, 111229.8182] +25-08-28 16:22:25 | D | + Adding low-rank branches to single_transformer_blocks.34.attn.to_q, single_transformer_blocks.34.attn.to_k, single_transformer_blocks.34.attn.to_v +25-08-28 16:22:25 | D | - Calibrating low-rank branch for single_transformer_blocks.34.proj_out.linears.0 +25-08-28 16:22:25 | D | + w: sint4 +25-08-28 16:22:25 | D | + x: sint4 +25-08-28 16:22:25 | D | + y: None +25-08-28 16:22:25 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:22:25 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:22:25 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:22:26 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:22:41 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:22:41 | D | - error = [ 2829.3754, 2746.0237, 2685.5232, 2661.5279, 2638.0532, 2617.7536, 2600.0212, 2585.5309, 2572.1310, 2566.1198] +25-08-28 16:22:41 | D | - best error = [ 2829.3754, 2746.0237, 2685.5232, 2661.5279, 2638.0532, 2617.7536, 2600.0212, 2585.5309, 2572.1310, 2566.1198] +25-08-28 16:22:56 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 16:22:56 | D | - error = [ 2559.9459, 2557.1090, 2551.0276, 2546.6326, 2542.0287, 2541.1634, 2536.7389, 2530.3346, 2528.7157, 2526.8162] +25-08-28 16:22:56 | D | - best error = [ 2559.9459, 2557.1090, 2551.0276, 2546.6326, 2542.0287, 2541.1634, 2536.7389, 2530.3346, 2528.7157, 2526.8162] +25-08-28 16:22:58 | D | - iter = [ 20] +25-08-28 16:22:58 | D | - error = [ 2529.1574] +25-08-28 16:22:58 | D | - best error = [ 2526.8162] +25-08-28 16:22:58 | D | + Adding low-rank branches to single_transformer_blocks.34.proj_out.linears.0 +25-08-28 16:22:58 | D | - Calibrating low-rank branch for single_transformer_blocks.34.proj_mlp +25-08-28 16:22:58 | D | + w: sint4 +25-08-28 16:22:58 | D | + x: sint4 +25-08-28 16:22:58 | D | + y: None +25-08-28 16:22:58 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:22:58 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:22:58 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:23:00 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:23:22 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:23:22 | D | - error = [ 5397.8238, 5369.9330, 5352.1147, 5341.3272, 5331.5557, 5326.5790, 5321.7398, 5318.4505, 5313.8447, 5308.5056] +25-08-28 16:23:22 | D | - best error = [ 5397.8238, 5369.9330, 5352.1147, 5341.3272, 5331.5557, 5326.5790, 5321.7398, 5318.4505, 5313.8447, 5308.5056] +25-08-28 16:23:44 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 16:23:44 | D | - error = [ 5306.3400, 5304.5982, 5301.9883, 5298.7819, 5297.0850, 5295.0543, 5292.5875, 5289.8646, 5288.5631, 5287.2369] +25-08-28 16:23:44 | D | - best error = [ 5306.3400, 5304.5982, 5301.9883, 5298.7819, 5297.0850, 5295.0543, 5292.5875, 5289.8646, 5288.5631, 5287.2369] +25-08-28 16:24:06 | D | - iter = [ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] +25-08-28 16:24:06 | D | - error = [ 5285.6965, 5284.7882, 5284.1904, 5283.8965, 5282.2773, 5281.5197, 5280.1011, 5279.7015, 5278.8332, 5277.3001] +25-08-28 16:24:06 | D | - best error = [ 5285.6965, 5284.7882, 5284.1904, 5283.8965, 5282.2773, 5281.5197, 5280.1011, 5279.7015, 5278.8332, 5277.3001] +25-08-28 16:24:10 | D | - iter = [ 30, 31] +25-08-28 16:24:10 | D | - error = [ 5276.6091, 5277.0609] +25-08-28 16:24:10 | D | - best error = [ 5276.6091, 5276.6091] +25-08-28 16:24:10 | D | + Adding low-rank branches to single_transformer_blocks.34.proj_mlp +25-08-28 16:24:10 | D | - Calibrating low-rank branch for single_transformer_blocks.34.proj_out.linears.1.linear +25-08-28 16:24:10 | D | + w: sint4 +25-08-28 16:24:10 | D | + x: sint4 +25-08-28 16:24:10 | D | + y: None +25-08-28 16:24:10 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:24:10 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:24:10 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:24:15 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:24:47 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:24:47 | D | - error = [ 5874.0950, 5858.1072, 5857.3448, 5842.4697, 5837.1037, 5834.0634, 5818.0248, 5807.6973, 5804.8001, 5797.5081] +25-08-28 16:24:47 | D | - best error = [ 5874.0950, 5858.1072, 5857.3448, 5842.4697, 5837.1037, 5834.0634, 5818.0248, 5807.6973, 5804.8001, 5797.5081] +25-08-28 16:24:51 | D | - iter = [ 10] +25-08-28 16:24:51 | D | - error = [ 5815.4068] +25-08-28 16:24:51 | D | - best error = [ 5797.5081] +25-08-28 16:24:51 | D | + Adding low-rank branches to single_transformer_blocks.34.proj_out.linears.1.linear +25-08-28 16:25:09 | D | - Calibrating low-rank branches of block single_transformer_blocks.35 +25-08-28 16:25:09 | D | - Calibrating low-rank branch for single_transformer_blocks.35.attn.to_q, single_transformer_blocks.35.attn.to_k, single_transformer_blocks.35.attn.to_v +25-08-28 16:25:09 | D | + w: sint4 +25-08-28 16:25:09 | D | + x: sint4 +25-08-28 16:25:09 | D | + y: None +25-08-28 16:25:09 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:25:09 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:25:09 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:25:10 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:25:17 | D | - iter = [ 0, 1, 2] +25-08-28 16:25:17 | D | - error = [122737.9387, 121329.2143, 121463.2452] +25-08-28 16:25:17 | D | - best error = [122737.9387, 121329.2143, 121329.2143] +25-08-28 16:25:17 | D | + Adding low-rank branches to single_transformer_blocks.35.attn.to_q, single_transformer_blocks.35.attn.to_k, single_transformer_blocks.35.attn.to_v +25-08-28 16:25:17 | D | - Calibrating low-rank branch for single_transformer_blocks.35.proj_out.linears.0 +25-08-28 16:25:17 | D | + w: sint4 +25-08-28 16:25:17 | D | + x: sint4 +25-08-28 16:25:17 | D | + y: None +25-08-28 16:25:17 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:25:17 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:25:17 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:25:18 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:25:33 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:25:33 | D | - error = [ 2654.3219, 2618.2686, 2593.9488, 2580.2820, 2568.3222, 2564.7315, 2554.2089, 2552.9659, 2550.0624, 2546.7825] +25-08-28 16:25:33 | D | - best error = [ 2654.3219, 2618.2686, 2593.9488, 2580.2820, 2568.3222, 2564.7315, 2554.2089, 2552.9659, 2550.0624, 2546.7825] +25-08-28 16:25:37 | D | - iter = [ 10, 11] +25-08-28 16:25:37 | D | - error = [ 2542.0694, 2542.4882] +25-08-28 16:25:37 | D | - best error = [ 2542.0694, 2542.0694] +25-08-28 16:25:37 | D | + Adding low-rank branches to single_transformer_blocks.35.proj_out.linears.0 +25-08-28 16:25:37 | D | - Calibrating low-rank branch for single_transformer_blocks.35.proj_mlp +25-08-28 16:25:37 | D | + w: sint4 +25-08-28 16:25:37 | D | + x: sint4 +25-08-28 16:25:37 | D | + y: None +25-08-28 16:25:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:25:37 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:25:37 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:25:38 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:25:59 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:25:59 | D | - error = [ 5240.7971, 5214.6162, 5198.0469, 5186.9613, 5178.9248, 5174.3697, 5169.7124, 5164.2976, 5159.7572, 5157.0643] +25-08-28 16:25:59 | D | - best error = [ 5240.7971, 5214.6162, 5198.0469, 5186.9613, 5178.9248, 5174.3697, 5169.7124, 5164.2976, 5159.7572, 5157.0643] +25-08-28 16:26:20 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 16:26:20 | D | - error = [ 5154.2352, 5152.2191, 5150.6992, 5147.7560, 5143.7202, 5142.2968, 5141.3160, 5138.7210, 5137.4437, 5136.5056] +25-08-28 16:26:20 | D | - best error = [ 5154.2352, 5152.2191, 5150.6992, 5147.7560, 5143.7202, 5142.2968, 5141.3160, 5138.7210, 5137.4437, 5136.5056] +25-08-28 16:26:31 | D | - iter = [ 20, 21, 22, 23, 24] +25-08-28 16:26:31 | D | - error = [ 5136.4490, 5134.1143, 5132.4602, 5131.1975, 5131.2322] +25-08-28 16:26:31 | D | - best error = [ 5136.4490, 5134.1143, 5132.4602, 5131.1975, 5131.1975] +25-08-28 16:26:32 | D | + Adding low-rank branches to single_transformer_blocks.35.proj_mlp +25-08-28 16:26:32 | D | - Calibrating low-rank branch for single_transformer_blocks.35.proj_out.linears.1.linear +25-08-28 16:26:32 | D | + w: sint4 +25-08-28 16:26:32 | D | + x: sint4 +25-08-28 16:26:32 | D | + y: None +25-08-28 16:26:32 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:26:32 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:26:32 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:26:37 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:26:53 | D | - iter = [ 0, 1, 2, 3, 4] +25-08-28 16:26:53 | D | - error = [ 5125.2900, 5099.1958, 5068.0278, 5055.6764, 5056.2069] +25-08-28 16:26:53 | D | - best error = [ 5125.2900, 5099.1958, 5068.0278, 5055.6764, 5055.6764] +25-08-28 16:26:54 | D | + Adding low-rank branches to single_transformer_blocks.35.proj_out.linears.1.linear +25-08-28 16:27:12 | D | - Calibrating low-rank branches of block single_transformer_blocks.36 +25-08-28 16:27:12 | D | - Calibrating low-rank branch for single_transformer_blocks.36.attn.to_q, single_transformer_blocks.36.attn.to_k, single_transformer_blocks.36.attn.to_v +25-08-28 16:27:12 | D | + w: sint4 +25-08-28 16:27:12 | D | + x: sint4 +25-08-28 16:27:12 | D | + y: None +25-08-28 16:27:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:27:12 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:27:12 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:27:13 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:27:35 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:27:35 | D | - error = [50802.0434, 50725.9757, 50252.5946, 49713.3054, 49181.8607, 48843.0918, 48562.7202, 48509.0448, 48287.3081, 48256.4350] +25-08-28 16:27:35 | D | - best error = [50802.0434, 50725.9757, 50252.5946, 49713.3054, 49181.8607, 48843.0918, 48562.7202, 48509.0448, 48287.3081, 48256.4350] +25-08-28 16:27:37 | D | - iter = [ 10] +25-08-28 16:27:37 | D | - error = [48569.2301] +25-08-28 16:27:37 | D | - best error = [48256.4350] +25-08-28 16:27:37 | D | + Adding low-rank branches to single_transformer_blocks.36.attn.to_q, single_transformer_blocks.36.attn.to_k, single_transformer_blocks.36.attn.to_v +25-08-28 16:27:37 | D | - Calibrating low-rank branch for single_transformer_blocks.36.proj_out.linears.0 +25-08-28 16:27:37 | D | + w: sint4 +25-08-28 16:27:37 | D | + x: sint4 +25-08-28 16:27:37 | D | + y: None +25-08-28 16:27:37 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:27:37 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:27:37 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:27:39 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:27:54 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:27:54 | D | - error = [ 4529.9084, 4439.7881, 4372.3239, 4341.0439, 4305.5843, 4277.5570, 4273.3490, 4256.9985, 4244.9938, 4236.7724] +25-08-28 16:27:54 | D | - best error = [ 4529.9084, 4439.7881, 4372.3239, 4341.0439, 4305.5843, 4277.5570, 4273.3490, 4256.9985, 4244.9938, 4236.7724] +25-08-28 16:28:01 | D | - iter = [ 10, 11, 12, 13, 14] +25-08-28 16:28:01 | D | - error = [ 4231.0020, 4221.4606, 4215.7467, 4205.9244, 4212.9524] +25-08-28 16:28:01 | D | - best error = [ 4231.0020, 4221.4606, 4215.7467, 4205.9244, 4205.9244] +25-08-28 16:28:01 | D | + Adding low-rank branches to single_transformer_blocks.36.proj_out.linears.0 +25-08-28 16:28:02 | D | - Calibrating low-rank branch for single_transformer_blocks.36.proj_mlp +25-08-28 16:28:02 | D | + w: sint4 +25-08-28 16:28:02 | D | + x: sint4 +25-08-28 16:28:02 | D | + y: None +25-08-28 16:28:02 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:28:02 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:28:02 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:28:04 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:28:25 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:28:25 | D | - error = [ 5551.6722, 5532.2451, 5518.2341, 5509.8281, 5502.9652, 5496.6841, 5489.1258, 5485.4792, 5481.1305, 5478.4430] +25-08-28 16:28:25 | D | - best error = [ 5551.6722, 5532.2451, 5518.2341, 5509.8281, 5502.9652, 5496.6841, 5489.1258, 5485.4792, 5481.1305, 5478.4430] +25-08-28 16:28:46 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] +25-08-28 16:28:46 | D | - error = [ 5476.5732, 5475.6505, 5473.7048, 5472.5283, 5470.7327, 5468.6004, 5468.4343, 5465.5924, 5464.3450, 5462.8751] +25-08-28 16:28:46 | D | - best error = [ 5476.5732, 5475.6505, 5473.7048, 5472.5283, 5470.7327, 5468.6004, 5468.4343, 5465.5924, 5464.3450, 5462.8751] +25-08-28 16:28:53 | D | - iter = [ 20, 21, 22] +25-08-28 16:28:53 | D | - error = [ 5460.8869, 5460.2725, 5460.3190] +25-08-28 16:28:53 | D | - best error = [ 5460.8869, 5460.2725, 5460.2725] +25-08-28 16:28:53 | D | + Adding low-rank branches to single_transformer_blocks.36.proj_mlp +25-08-28 16:28:54 | D | - Calibrating low-rank branch for single_transformer_blocks.36.proj_out.linears.1.linear +25-08-28 16:28:54 | D | + w: sint4 +25-08-28 16:28:54 | D | + x: sint4 +25-08-28 16:28:54 | D | + y: None +25-08-28 16:28:54 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:28:54 | D | + finished parsing calibration arguments, ram usage: 13.6 +25-08-28 16:28:54 | D | + finished resetting calibrator, ram usage: 13.6 +25-08-28 16:28:59 | D | + finished calculating the original outputs, ram usage: 13.6 +25-08-28 16:29:18 | D | - iter = [ 0, 1, 2, 3, 4, 5] +25-08-28 16:29:18 | D | - error = [16665.8856, 16592.8428, 16581.2511, 16546.9920, 16546.0444, 16557.1766] +25-08-28 16:29:18 | D | - best error = [16665.8856, 16592.8428, 16581.2511, 16546.9920, 16546.0444, 16546.0444] +25-08-28 16:29:18 | D | + Adding low-rank branches to single_transformer_blocks.36.proj_out.linears.1.linear +25-08-28 16:29:36 | D | - Calibrating low-rank branches of block single_transformer_blocks.37 +25-08-28 16:29:36 | D | - Calibrating low-rank branch for single_transformer_blocks.37.attn.to_q, single_transformer_blocks.37.attn.to_k, single_transformer_blocks.37.attn.to_v +25-08-28 16:29:36 | D | + w: sint4 +25-08-28 16:29:36 | D | + x: sint4 +25-08-28 16:29:36 | D | + y: None +25-08-28 16:29:36 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:29:36 | D | + finished parsing calibration arguments, ram usage: 13.2 +25-08-28 16:29:36 | D | + finished resetting calibrator, ram usage: 13.2 +25-08-28 16:29:38 | D | + finished calculating the original outputs, ram usage: 13.2 +25-08-28 16:29:44 | D | - iter = [ 0, 1, 2] +25-08-28 16:29:44 | D | - error = [12517.5716, 12413.4276, 12524.6349] +25-08-28 16:29:44 | D | - best error = [12517.5716, 12413.4276, 12413.4276] +25-08-28 16:29:45 | D | + Adding low-rank branches to single_transformer_blocks.37.attn.to_q, single_transformer_blocks.37.attn.to_k, single_transformer_blocks.37.attn.to_v +25-08-28 16:29:45 | D | - Calibrating low-rank branch for single_transformer_blocks.37.proj_out.linears.0 +25-08-28 16:29:45 | D | + w: sint4 +25-08-28 16:29:45 | D | + x: sint4 +25-08-28 16:29:45 | D | + y: None +25-08-28 16:29:45 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:29:45 | D | + finished parsing calibration arguments, ram usage: 13.2 +25-08-28 16:29:45 | D | + finished resetting calibrator, ram usage: 13.2 +25-08-28 16:29:46 | D | + finished calculating the original outputs, ram usage: 13.2 +25-08-28 16:30:01 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:30:01 | D | - error = [ 1373.8318, 1336.7131, 1316.2657, 1297.1435, 1283.6090, 1273.6862, 1265.4206, 1263.8477, 1258.0986, 1252.6071] +25-08-28 16:30:01 | D | - best error = [ 1373.8318, 1336.7131, 1316.2657, 1297.1435, 1283.6090, 1273.6862, 1265.4206, 1263.8477, 1258.0986, 1252.6071] +25-08-28 16:30:11 | D | - iter = [ 10, 11, 12, 13, 14, 15, 16] +25-08-28 16:30:11 | D | - error = [ 1249.2646, 1247.7452, 1243.1250, 1242.0850, 1240.4939, 1238.9223, 1240.1930] +25-08-28 16:30:11 | D | - best error = [ 1249.2646, 1247.7452, 1243.1250, 1242.0850, 1240.4939, 1238.9223, 1238.9223] +25-08-28 16:30:11 | D | + Adding low-rank branches to single_transformer_blocks.37.proj_out.linears.0 +25-08-28 16:30:12 | D | - Calibrating low-rank branch for single_transformer_blocks.37.proj_mlp +25-08-28 16:30:12 | D | + w: sint4 +25-08-28 16:30:12 | D | + x: sint4 +25-08-28 16:30:12 | D | + y: None +25-08-28 16:30:12 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:30:12 | D | + finished parsing calibration arguments, ram usage: 13.2 +25-08-28 16:30:12 | D | + finished resetting calibrator, ram usage: 13.2 +25-08-28 16:30:13 | D | + finished calculating the original outputs, ram usage: 13.2 +25-08-28 16:30:35 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +25-08-28 16:30:35 | D | - error = [ 4940.8817, 4928.4821, 4919.5166, 4914.2755, 4908.4364, 4906.2684, 4903.5754, 4901.9292, 4898.1901, 4895.4123] +25-08-28 16:30:35 | D | - best error = [ 4940.8817, 4928.4821, 4919.5166, 4914.2755, 4908.4364, 4906.2684, 4903.5754, 4901.9292, 4898.1901, 4895.4123] +25-08-28 16:30:39 | D | - iter = [ 10, 11] +25-08-28 16:30:39 | D | - error = [ 4893.5803, 4894.0723] +25-08-28 16:30:39 | D | - best error = [ 4893.5803, 4893.5803] +25-08-28 16:30:39 | D | + Adding low-rank branches to single_transformer_blocks.37.proj_mlp +25-08-28 16:30:39 | D | - Calibrating low-rank branch for single_transformer_blocks.37.proj_out.linears.1.linear +25-08-28 16:30:39 | D | + w: sint4 +25-08-28 16:30:39 | D | + x: sint4 +25-08-28 16:30:39 | D | + y: None +25-08-28 16:30:39 | D | + tensor_type: TensorType.Weights, objective: SearchBasedCalibObjective.OutputsError, granularity: SearchBasedCalibGranularity.Layer +25-08-28 16:30:39 | D | + finished parsing calibration arguments, ram usage: 13.3 +25-08-28 16:30:39 | D | + finished resetting calibrator, ram usage: 13.3 +25-08-28 16:30:44 | D | + finished calculating the original outputs, ram usage: 13.3 +25-08-28 16:31:12 | D | - iter = [ 0, 1, 2, 3, 4, 5, 6, 7, 8] +25-08-28 16:31:12 | D | - error = [ 5226.3756, 5184.3142, 5156.1144, 5139.4595, 5119.4129, 5117.6967, 5112.7734, 5110.2879, 5112.9449] +25-08-28 16:31:12 | D | - best error = [ 5226.3756, 5184.3142, 5156.1144, 5139.4595, 5119.4129, 5117.6967, 5112.7734, 5110.2879, 5110.2879] +25-08-28 16:31:12 | D | + Adding low-rank branches to single_transformer_blocks.37.proj_out.linears.1.linear +25-08-28 16:31:14 | D | - Calibrating weights: block transformer_blocks.0 +25-08-28 16:31:14 | D | - Calibrating transformer_blocks.0.norm1.linear.weight quantizer +25-08-28 16:31:14 | D | - Calibrating transformer_blocks.0.norm1_context.linear.weight quantizer +25-08-28 16:31:14 | D | - Calibrating transformer_blocks.0.attn.to_q.weight quantizer +25-08-28 16:31:14 | D | - Calibrating transformer_blocks.0.attn.to_k.weight quantizer +25-08-28 16:31:14 | D | - Calibrating transformer_blocks.0.attn.to_v.weight quantizer +25-08-28 16:31:14 | D | - Calibrating transformer_blocks.0.attn.add_q_proj.weight quantizer +25-08-28 16:31:15 | D | - Calibrating transformer_blocks.0.attn.add_k_proj.weight quantizer +25-08-28 16:31:15 | D | - Calibrating transformer_blocks.0.attn.add_v_proj.weight quantizer +25-08-28 16:31:15 | D | - Calibrating transformer_blocks.0.attn.to_out.0.weight quantizer +25-08-28 16:31:15 | D | - Calibrating transformer_blocks.0.attn.to_add_out.weight quantizer +25-08-28 16:31:15 | D | - Calibrating transformer_blocks.0.ff.net.0.proj.weight quantizer +25-08-28 16:31:16 | D | - Calibrating transformer_blocks.0.ff.net.2.linear.weight quantizer +25-08-28 16:31:16 | D | - Calibrating transformer_blocks.0.ff_context.net.0.proj.weight quantizer +25-08-28 16:31:16 | D | - Calibrating transformer_blocks.0.ff_context.net.2.linear.weight quantizer +25-08-28 16:31:16 | D | - Calibrating weights: block transformer_blocks.1 +25-08-28 16:31:16 | D | - Calibrating transformer_blocks.1.norm1.linear.weight quantizer +25-08-28 16:31:16 | D | - Calibrating transformer_blocks.1.norm1_context.linear.weight quantizer +25-08-28 16:31:17 | D | - Calibrating transformer_blocks.1.attn.to_q.weight quantizer +25-08-28 16:31:17 | D | - Calibrating transformer_blocks.1.attn.to_k.weight quantizer +25-08-28 16:31:17 | D | - Calibrating transformer_blocks.1.attn.to_v.weight quantizer +25-08-28 16:31:17 | D | - Calibrating transformer_blocks.1.attn.add_q_proj.weight quantizer +25-08-28 16:31:17 | D | - Calibrating transformer_blocks.1.attn.add_k_proj.weight quantizer +25-08-28 16:31:17 | D | - Calibrating transformer_blocks.1.attn.add_v_proj.weight quantizer +25-08-28 16:31:18 | D | - Calibrating transformer_blocks.1.attn.to_out.0.weight quantizer +25-08-28 16:31:18 | D | - Calibrating transformer_blocks.1.attn.to_add_out.weight quantizer +25-08-28 16:31:18 | D | - Calibrating transformer_blocks.1.ff.net.0.proj.weight quantizer +25-08-28 16:31:18 | D | - Calibrating transformer_blocks.1.ff.net.2.linear.weight quantizer +25-08-28 16:31:18 | D | - Calibrating transformer_blocks.1.ff_context.net.0.proj.weight quantizer +25-08-28 16:31:19 | D | - Calibrating transformer_blocks.1.ff_context.net.2.linear.weight quantizer +25-08-28 16:31:19 | D | - Calibrating weights: block transformer_blocks.2 +25-08-28 16:31:19 | D | - Calibrating transformer_blocks.2.norm1.linear.weight quantizer +25-08-28 16:31:19 | D | - Calibrating transformer_blocks.2.norm1_context.linear.weight quantizer +25-08-28 16:31:19 | D | - Calibrating transformer_blocks.2.attn.to_q.weight quantizer +25-08-28 16:31:19 | D | - Calibrating transformer_blocks.2.attn.to_k.weight quantizer +25-08-28 16:31:20 | D | - Calibrating transformer_blocks.2.attn.to_v.weight quantizer +25-08-28 16:31:20 | D | - Calibrating transformer_blocks.2.attn.add_q_proj.weight quantizer +25-08-28 16:31:20 | D | - Calibrating transformer_blocks.2.attn.add_k_proj.weight quantizer +25-08-28 16:31:20 | D | - Calibrating transformer_blocks.2.attn.add_v_proj.weight quantizer +25-08-28 16:31:20 | D | - Calibrating transformer_blocks.2.attn.to_out.0.weight quantizer +25-08-28 16:31:20 | D | - Calibrating transformer_blocks.2.attn.to_add_out.weight quantizer +25-08-28 16:31:21 | D | - Calibrating transformer_blocks.2.ff.net.0.proj.weight quantizer +25-08-28 16:31:21 | D | - Calibrating transformer_blocks.2.ff.net.2.linear.weight quantizer +25-08-28 16:31:21 | D | - Calibrating transformer_blocks.2.ff_context.net.0.proj.weight quantizer +25-08-28 16:31:21 | D | - Calibrating transformer_blocks.2.ff_context.net.2.linear.weight quantizer +25-08-28 16:31:21 | D | - Calibrating weights: block transformer_blocks.3 +25-08-28 16:31:21 | D | - Calibrating transformer_blocks.3.norm1.linear.weight quantizer +25-08-28 16:31:22 | D | - Calibrating transformer_blocks.3.norm1_context.linear.weight quantizer +25-08-28 16:31:22 | D | - Calibrating transformer_blocks.3.attn.to_q.weight quantizer +25-08-28 16:31:22 | D | - Calibrating transformer_blocks.3.attn.to_k.weight quantizer +25-08-28 16:31:22 | D | - Calibrating transformer_blocks.3.attn.to_v.weight quantizer +25-08-28 16:31:22 | D | - Calibrating transformer_blocks.3.attn.add_q_proj.weight quantizer +25-08-28 16:31:23 | D | - Calibrating transformer_blocks.3.attn.add_k_proj.weight quantizer +25-08-28 16:31:23 | D | - Calibrating transformer_blocks.3.attn.add_v_proj.weight quantizer +25-08-28 16:31:23 | D | - Calibrating transformer_blocks.3.attn.to_out.0.weight quantizer +25-08-28 16:31:23 | D | - Calibrating transformer_blocks.3.attn.to_add_out.weight quantizer +25-08-28 16:31:23 | D | - Calibrating transformer_blocks.3.ff.net.0.proj.weight quantizer +25-08-28 16:31:23 | D | - Calibrating transformer_blocks.3.ff.net.2.linear.weight quantizer +25-08-28 16:31:24 | D | - Calibrating transformer_blocks.3.ff_context.net.0.proj.weight quantizer +25-08-28 16:31:24 | D | - Calibrating transformer_blocks.3.ff_context.net.2.linear.weight quantizer +25-08-28 16:31:24 | D | - Calibrating weights: block transformer_blocks.4 +25-08-28 16:31:24 | D | - Calibrating transformer_blocks.4.norm1.linear.weight quantizer +25-08-28 16:31:24 | D | - Calibrating transformer_blocks.4.norm1_context.linear.weight quantizer +25-08-28 16:31:24 | D | - Calibrating transformer_blocks.4.attn.to_q.weight quantizer +25-08-28 16:31:25 | D | - Calibrating transformer_blocks.4.attn.to_k.weight quantizer +25-08-28 16:31:25 | D | - Calibrating transformer_blocks.4.attn.to_v.weight quantizer +25-08-28 16:31:25 | D | - Calibrating transformer_blocks.4.attn.add_q_proj.weight quantizer +25-08-28 16:31:25 | D | - Calibrating transformer_blocks.4.attn.add_k_proj.weight quantizer +25-08-28 16:31:25 | D | - Calibrating transformer_blocks.4.attn.add_v_proj.weight quantizer +25-08-28 16:31:25 | D | - Calibrating transformer_blocks.4.attn.to_out.0.weight quantizer +25-08-28 16:31:26 | D | - Calibrating transformer_blocks.4.attn.to_add_out.weight quantizer +25-08-28 16:31:26 | D | - Calibrating transformer_blocks.4.ff.net.0.proj.weight quantizer +25-08-28 16:31:26 | D | - Calibrating transformer_blocks.4.ff.net.2.linear.weight quantizer +25-08-28 16:31:26 | D | - Calibrating transformer_blocks.4.ff_context.net.0.proj.weight quantizer +25-08-28 16:31:26 | D | - Calibrating transformer_blocks.4.ff_context.net.2.linear.weight quantizer +25-08-28 16:31:27 | D | - Calibrating weights: block transformer_blocks.5 +25-08-28 16:31:27 | D | - Calibrating transformer_blocks.5.norm1.linear.weight quantizer +25-08-28 16:31:27 | D | - Calibrating transformer_blocks.5.norm1_context.linear.weight quantizer +25-08-28 16:31:27 | D | - Calibrating transformer_blocks.5.attn.to_q.weight quantizer +25-08-28 16:31:27 | D | - Calibrating transformer_blocks.5.attn.to_k.weight quantizer +25-08-28 16:31:27 | D | - Calibrating transformer_blocks.5.attn.to_v.weight quantizer +25-08-28 16:31:28 | D | - Calibrating transformer_blocks.5.attn.add_q_proj.weight quantizer +25-08-28 16:31:28 | D | - Calibrating transformer_blocks.5.attn.add_k_proj.weight quantizer +25-08-28 16:31:28 | D | - Calibrating transformer_blocks.5.attn.add_v_proj.weight quantizer +25-08-28 16:31:28 | D | - Calibrating transformer_blocks.5.attn.to_out.0.weight quantizer +25-08-28 16:31:28 | D | - Calibrating transformer_blocks.5.attn.to_add_out.weight quantizer +25-08-28 16:31:28 | D | - Calibrating transformer_blocks.5.ff.net.0.proj.weight quantizer +25-08-28 16:31:29 | D | - Calibrating transformer_blocks.5.ff.net.2.linear.weight quantizer +25-08-28 16:31:29 | D | - Calibrating transformer_blocks.5.ff_context.net.0.proj.weight quantizer +25-08-28 16:31:29 | D | - Calibrating transformer_blocks.5.ff_context.net.2.linear.weight quantizer +25-08-28 16:31:29 | D | - Calibrating weights: block transformer_blocks.6 +25-08-28 16:31:29 | D | - Calibrating transformer_blocks.6.norm1.linear.weight quantizer +25-08-28 16:31:29 | D | - Calibrating transformer_blocks.6.norm1_context.linear.weight quantizer +25-08-28 16:31:30 | D | - Calibrating transformer_blocks.6.attn.to_q.weight quantizer +25-08-28 16:31:30 | D | - Calibrating transformer_blocks.6.attn.to_k.weight quantizer +25-08-28 16:31:30 | D | - Calibrating transformer_blocks.6.attn.to_v.weight quantizer +25-08-28 16:31:30 | D | - Calibrating transformer_blocks.6.attn.add_q_proj.weight quantizer +25-08-28 16:31:30 | D | - Calibrating transformer_blocks.6.attn.add_k_proj.weight quantizer +25-08-28 16:31:31 | D | - Calibrating transformer_blocks.6.attn.add_v_proj.weight quantizer +25-08-28 16:31:31 | D | - Calibrating transformer_blocks.6.attn.to_out.0.weight quantizer +25-08-28 16:31:31 | D | - Calibrating transformer_blocks.6.attn.to_add_out.weight quantizer +25-08-28 16:31:31 | D | - Calibrating transformer_blocks.6.ff.net.0.proj.weight quantizer +25-08-28 16:31:31 | D | - Calibrating transformer_blocks.6.ff.net.2.linear.weight quantizer +25-08-28 16:31:31 | D | - Calibrating transformer_blocks.6.ff_context.net.0.proj.weight quantizer +25-08-28 16:31:32 | D | - Calibrating transformer_blocks.6.ff_context.net.2.linear.weight quantizer +25-08-28 16:31:32 | D | - Calibrating weights: block transformer_blocks.7 +25-08-28 16:31:32 | D | - Calibrating transformer_blocks.7.norm1.linear.weight quantizer +25-08-28 16:31:32 | D | - Calibrating transformer_blocks.7.norm1_context.linear.weight quantizer +25-08-28 16:31:32 | D | - Calibrating transformer_blocks.7.attn.to_q.weight quantizer +25-08-28 16:31:32 | D | - Calibrating transformer_blocks.7.attn.to_k.weight quantizer +25-08-28 16:31:33 | D | - Calibrating transformer_blocks.7.attn.to_v.weight quantizer +25-08-28 16:31:33 | D | - Calibrating transformer_blocks.7.attn.add_q_proj.weight quantizer +25-08-28 16:31:33 | D | - Calibrating transformer_blocks.7.attn.add_k_proj.weight quantizer +25-08-28 16:31:33 | D | - Calibrating transformer_blocks.7.attn.add_v_proj.weight quantizer +25-08-28 16:31:33 | D | - Calibrating transformer_blocks.7.attn.to_out.0.weight quantizer +25-08-28 16:31:34 | D | - Calibrating transformer_blocks.7.attn.to_add_out.weight quantizer +25-08-28 16:31:34 | D | - Calibrating transformer_blocks.7.ff.net.0.proj.weight quantizer +25-08-28 16:31:34 | D | - Calibrating transformer_blocks.7.ff.net.2.linear.weight quantizer +25-08-28 16:31:34 | D | - Calibrating transformer_blocks.7.ff_context.net.0.proj.weight quantizer +25-08-28 16:31:34 | D | - Calibrating transformer_blocks.7.ff_context.net.2.linear.weight quantizer +25-08-28 16:31:34 | D | - Calibrating weights: block transformer_blocks.8 +25-08-28 16:31:34 | D | - Calibrating transformer_blocks.8.norm1.linear.weight quantizer +25-08-28 16:31:35 | D | - Calibrating transformer_blocks.8.norm1_context.linear.weight quantizer +25-08-28 16:31:35 | D | - Calibrating transformer_blocks.8.attn.to_q.weight quantizer +25-08-28 16:31:35 | D | - Calibrating transformer_blocks.8.attn.to_k.weight quantizer +25-08-28 16:31:35 | D | - Calibrating transformer_blocks.8.attn.to_v.weight quantizer +25-08-28 16:31:35 | D | - Calibrating transformer_blocks.8.attn.add_q_proj.weight quantizer +25-08-28 16:31:36 | D | - Calibrating transformer_blocks.8.attn.add_k_proj.weight quantizer +25-08-28 16:31:36 | D | - Calibrating transformer_blocks.8.attn.add_v_proj.weight quantizer +25-08-28 16:31:36 | D | - Calibrating transformer_blocks.8.attn.to_out.0.weight quantizer +25-08-28 16:31:36 | D | - Calibrating transformer_blocks.8.attn.to_add_out.weight quantizer +25-08-28 16:31:36 | D | - Calibrating transformer_blocks.8.ff.net.0.proj.weight quantizer +25-08-28 16:31:36 | D | - Calibrating transformer_blocks.8.ff.net.2.linear.weight quantizer +25-08-28 16:31:37 | D | - Calibrating transformer_blocks.8.ff_context.net.0.proj.weight quantizer +25-08-28 16:31:37 | D | - Calibrating transformer_blocks.8.ff_context.net.2.linear.weight quantizer +25-08-28 16:31:37 | D | - Calibrating weights: block transformer_blocks.9 +25-08-28 16:31:37 | D | - Calibrating transformer_blocks.9.norm1.linear.weight quantizer +25-08-28 16:31:37 | D | - Calibrating transformer_blocks.9.norm1_context.linear.weight quantizer +25-08-28 16:31:37 | D | - Calibrating transformer_blocks.9.attn.to_q.weight quantizer +25-08-28 16:31:38 | D | - Calibrating transformer_blocks.9.attn.to_k.weight quantizer +25-08-28 16:31:38 | D | - Calibrating transformer_blocks.9.attn.to_v.weight quantizer +25-08-28 16:31:38 | D | - Calibrating transformer_blocks.9.attn.add_q_proj.weight quantizer +25-08-28 16:31:38 | D | - Calibrating transformer_blocks.9.attn.add_k_proj.weight quantizer +25-08-28 16:31:38 | D | - Calibrating transformer_blocks.9.attn.add_v_proj.weight quantizer +25-08-28 16:31:39 | D | - Calibrating transformer_blocks.9.attn.to_out.0.weight quantizer +25-08-28 16:31:39 | D | - Calibrating transformer_blocks.9.attn.to_add_out.weight quantizer +25-08-28 16:31:39 | D | - Calibrating transformer_blocks.9.ff.net.0.proj.weight quantizer +25-08-28 16:31:39 | D | - Calibrating transformer_blocks.9.ff.net.2.linear.weight quantizer +25-08-28 16:31:39 | D | - Calibrating transformer_blocks.9.ff_context.net.0.proj.weight quantizer +25-08-28 16:31:39 | D | - Calibrating transformer_blocks.9.ff_context.net.2.linear.weight quantizer +25-08-28 16:31:40 | D | - Calibrating weights: block transformer_blocks.10 +25-08-28 16:31:40 | D | - Calibrating transformer_blocks.10.norm1.linear.weight quantizer +25-08-28 16:31:40 | D | - Calibrating transformer_blocks.10.norm1_context.linear.weight quantizer +25-08-28 16:31:40 | D | - Calibrating transformer_blocks.10.attn.to_q.weight quantizer +25-08-28 16:31:40 | D | - Calibrating transformer_blocks.10.attn.to_k.weight quantizer +25-08-28 16:31:40 | D | - Calibrating transformer_blocks.10.attn.to_v.weight quantizer +25-08-28 16:31:41 | D | - Calibrating transformer_blocks.10.attn.add_q_proj.weight quantizer +25-08-28 16:31:41 | D | - Calibrating transformer_blocks.10.attn.add_k_proj.weight quantizer +25-08-28 16:31:41 | D | - Calibrating transformer_blocks.10.attn.add_v_proj.weight quantizer +25-08-28 16:31:41 | D | - Calibrating transformer_blocks.10.attn.to_out.0.weight quantizer +25-08-28 16:31:41 | D | - Calibrating transformer_blocks.10.attn.to_add_out.weight quantizer +25-08-28 16:31:42 | D | - Calibrating transformer_blocks.10.ff.net.0.proj.weight quantizer +25-08-28 16:31:42 | D | - Calibrating transformer_blocks.10.ff.net.2.linear.weight quantizer +25-08-28 16:31:42 | D | - Calibrating transformer_blocks.10.ff_context.net.0.proj.weight quantizer +25-08-28 16:31:42 | D | - Calibrating transformer_blocks.10.ff_context.net.2.linear.weight quantizer +25-08-28 16:31:42 | D | - Calibrating weights: block transformer_blocks.11 +25-08-28 16:31:42 | D | - Calibrating transformer_blocks.11.norm1.linear.weight quantizer +25-08-28 16:31:42 | D | - Calibrating transformer_blocks.11.norm1_context.linear.weight quantizer +25-08-28 16:31:43 | D | - Calibrating transformer_blocks.11.attn.to_q.weight quantizer +25-08-28 16:31:43 | D | - Calibrating transformer_blocks.11.attn.to_k.weight quantizer +25-08-28 16:31:43 | D | - Calibrating transformer_blocks.11.attn.to_v.weight quantizer +25-08-28 16:31:43 | D | - Calibrating transformer_blocks.11.attn.add_q_proj.weight quantizer +25-08-28 16:31:43 | D | - Calibrating transformer_blocks.11.attn.add_k_proj.weight quantizer +25-08-28 16:31:44 | D | - Calibrating transformer_blocks.11.attn.add_v_proj.weight quantizer +25-08-28 16:31:44 | D | - Calibrating transformer_blocks.11.attn.to_out.0.weight quantizer +25-08-28 16:31:44 | D | - Calibrating transformer_blocks.11.attn.to_add_out.weight quantizer +25-08-28 16:31:44 | D | - Calibrating transformer_blocks.11.ff.net.0.proj.weight quantizer +25-08-28 16:31:44 | D | - Calibrating transformer_blocks.11.ff.net.2.linear.weight quantizer +25-08-28 16:31:45 | D | - Calibrating transformer_blocks.11.ff_context.net.0.proj.weight quantizer +25-08-28 16:31:45 | D | - Calibrating transformer_blocks.11.ff_context.net.2.linear.weight quantizer +25-08-28 16:31:45 | D | - Calibrating weights: block transformer_blocks.12 +25-08-28 16:31:45 | D | - Calibrating transformer_blocks.12.norm1.linear.weight quantizer +25-08-28 16:31:45 | D | - Calibrating transformer_blocks.12.norm1_context.linear.weight quantizer +25-08-28 16:31:45 | D | - Calibrating transformer_blocks.12.attn.to_q.weight quantizer +25-08-28 16:31:45 | D | - Calibrating transformer_blocks.12.attn.to_k.weight quantizer +25-08-28 16:31:46 | D | - Calibrating transformer_blocks.12.attn.to_v.weight quantizer +25-08-28 16:31:46 | D | - Calibrating transformer_blocks.12.attn.add_q_proj.weight quantizer +25-08-28 16:31:46 | D | - Calibrating transformer_blocks.12.attn.add_k_proj.weight quantizer +25-08-28 16:31:46 | D | - Calibrating transformer_blocks.12.attn.add_v_proj.weight quantizer +25-08-28 16:31:46 | D | - Calibrating transformer_blocks.12.attn.to_out.0.weight quantizer +25-08-28 16:31:47 | D | - Calibrating transformer_blocks.12.attn.to_add_out.weight quantizer +25-08-28 16:31:47 | D | - Calibrating transformer_blocks.12.ff.net.0.proj.weight quantizer +25-08-28 16:31:47 | D | - Calibrating transformer_blocks.12.ff.net.2.linear.weight quantizer +25-08-28 16:31:47 | D | - Calibrating transformer_blocks.12.ff_context.net.0.proj.weight quantizer +25-08-28 16:31:47 | D | - Calibrating transformer_blocks.12.ff_context.net.2.linear.weight quantizer +25-08-28 16:31:48 | D | - Calibrating weights: block transformer_blocks.13 +25-08-28 16:31:48 | D | - Calibrating transformer_blocks.13.norm1.linear.weight quantizer +25-08-28 16:31:48 | D | - Calibrating transformer_blocks.13.norm1_context.linear.weight quantizer +25-08-28 16:31:48 | D | - Calibrating transformer_blocks.13.attn.to_q.weight quantizer +25-08-28 16:31:48 | D | - Calibrating transformer_blocks.13.attn.to_k.weight quantizer +25-08-28 16:31:48 | D | - Calibrating transformer_blocks.13.attn.to_v.weight quantizer +25-08-28 16:31:48 | D | - Calibrating transformer_blocks.13.attn.add_q_proj.weight quantizer +25-08-28 16:31:49 | D | - Calibrating transformer_blocks.13.attn.add_k_proj.weight quantizer +25-08-28 16:31:49 | D | - Calibrating transformer_blocks.13.attn.add_v_proj.weight quantizer +25-08-28 16:31:49 | D | - Calibrating transformer_blocks.13.attn.to_out.0.weight quantizer +25-08-28 16:31:49 | D | - Calibrating transformer_blocks.13.attn.to_add_out.weight quantizer +25-08-28 16:31:49 | D | - Calibrating transformer_blocks.13.ff.net.0.proj.weight quantizer +25-08-28 16:31:50 | D | - Calibrating transformer_blocks.13.ff.net.2.linear.weight quantizer +25-08-28 16:31:50 | D | - Calibrating transformer_blocks.13.ff_context.net.0.proj.weight quantizer +25-08-28 16:31:50 | D | - Calibrating transformer_blocks.13.ff_context.net.2.linear.weight quantizer +25-08-28 16:31:50 | D | - Calibrating weights: block transformer_blocks.14 +25-08-28 16:31:50 | D | - Calibrating transformer_blocks.14.norm1.linear.weight quantizer +25-08-28 16:31:50 | D | - Calibrating transformer_blocks.14.norm1_context.linear.weight quantizer +25-08-28 16:31:50 | D | - Calibrating transformer_blocks.14.attn.to_q.weight quantizer +25-08-28 16:31:51 | D | - Calibrating transformer_blocks.14.attn.to_k.weight quantizer +25-08-28 16:31:51 | D | - Calibrating transformer_blocks.14.attn.to_v.weight quantizer +25-08-28 16:31:51 | D | - Calibrating transformer_blocks.14.attn.add_q_proj.weight quantizer +25-08-28 16:31:51 | D | - Calibrating transformer_blocks.14.attn.add_k_proj.weight quantizer +25-08-28 16:31:52 | D | - Calibrating transformer_blocks.14.attn.add_v_proj.weight quantizer +25-08-28 16:31:52 | D | - Calibrating transformer_blocks.14.attn.to_out.0.weight quantizer +25-08-28 16:31:52 | D | - Calibrating transformer_blocks.14.attn.to_add_out.weight quantizer +25-08-28 16:31:52 | D | - Calibrating transformer_blocks.14.ff.net.0.proj.weight quantizer +25-08-28 16:31:52 | D | - Calibrating transformer_blocks.14.ff.net.2.linear.weight quantizer +25-08-28 16:31:52 | D | - Calibrating transformer_blocks.14.ff_context.net.0.proj.weight quantizer +25-08-28 16:31:53 | D | - Calibrating transformer_blocks.14.ff_context.net.2.linear.weight quantizer +25-08-28 16:31:53 | D | - Calibrating weights: block transformer_blocks.15 +25-08-28 16:31:53 | D | - Calibrating transformer_blocks.15.norm1.linear.weight quantizer +25-08-28 16:31:53 | D | - Calibrating transformer_blocks.15.norm1_context.linear.weight quantizer +25-08-28 16:31:53 | D | - Calibrating transformer_blocks.15.attn.to_q.weight quantizer +25-08-28 16:31:53 | D | - Calibrating transformer_blocks.15.attn.to_k.weight quantizer +25-08-28 16:31:54 | D | - Calibrating transformer_blocks.15.attn.to_v.weight quantizer +25-08-28 16:31:54 | D | - Calibrating transformer_blocks.15.attn.add_q_proj.weight quantizer +25-08-28 16:31:54 | D | - Calibrating transformer_blocks.15.attn.add_k_proj.weight quantizer +25-08-28 16:31:54 | D | - Calibrating transformer_blocks.15.attn.add_v_proj.weight quantizer +25-08-28 16:31:54 | D | - Calibrating transformer_blocks.15.attn.to_out.0.weight quantizer +25-08-28 16:31:54 | D | - Calibrating transformer_blocks.15.attn.to_add_out.weight quantizer +25-08-28 16:31:55 | D | - Calibrating transformer_blocks.15.ff.net.0.proj.weight quantizer +25-08-28 16:31:55 | D | - Calibrating transformer_blocks.15.ff.net.2.linear.weight quantizer +25-08-28 16:31:55 | D | - Calibrating transformer_blocks.15.ff_context.net.0.proj.weight quantizer +25-08-28 16:31:55 | D | - Calibrating transformer_blocks.15.ff_context.net.2.linear.weight quantizer +25-08-28 16:31:55 | D | - Calibrating weights: block transformer_blocks.16 +25-08-28 16:31:55 | D | - Calibrating transformer_blocks.16.norm1.linear.weight quantizer +25-08-28 16:31:56 | D | - Calibrating transformer_blocks.16.norm1_context.linear.weight quantizer +25-08-28 16:31:56 | D | - Calibrating transformer_blocks.16.attn.to_q.weight quantizer +25-08-28 16:31:56 | D | - Calibrating transformer_blocks.16.attn.to_k.weight quantizer +25-08-28 16:31:56 | D | - Calibrating transformer_blocks.16.attn.to_v.weight quantizer +25-08-28 16:31:56 | D | - Calibrating transformer_blocks.16.attn.add_q_proj.weight quantizer +25-08-28 16:31:57 | D | - Calibrating transformer_blocks.16.attn.add_k_proj.weight quantizer +25-08-28 16:31:57 | D | - Calibrating transformer_blocks.16.attn.add_v_proj.weight quantizer +25-08-28 16:31:57 | D | - Calibrating transformer_blocks.16.attn.to_out.0.weight quantizer +25-08-28 16:31:57 | D | - Calibrating transformer_blocks.16.attn.to_add_out.weight quantizer +25-08-28 16:31:57 | D | - Calibrating transformer_blocks.16.ff.net.0.proj.weight quantizer +25-08-28 16:31:57 | D | - Calibrating transformer_blocks.16.ff.net.2.linear.weight quantizer +25-08-28 16:31:58 | D | - Calibrating transformer_blocks.16.ff_context.net.0.proj.weight quantizer +25-08-28 16:31:58 | D | - Calibrating transformer_blocks.16.ff_context.net.2.linear.weight quantizer +25-08-28 16:31:58 | D | - Calibrating weights: block transformer_blocks.17 +25-08-28 16:31:58 | D | - Calibrating transformer_blocks.17.norm1.linear.weight quantizer +25-08-28 16:31:58 | D | - Calibrating transformer_blocks.17.norm1_context.linear.weight quantizer +25-08-28 16:31:58 | D | - Calibrating transformer_blocks.17.attn.to_q.weight quantizer +25-08-28 16:31:59 | D | - Calibrating transformer_blocks.17.attn.to_k.weight quantizer +25-08-28 16:31:59 | D | - Calibrating transformer_blocks.17.attn.to_v.weight quantizer +25-08-28 16:31:59 | D | - Calibrating transformer_blocks.17.attn.add_q_proj.weight quantizer +25-08-28 16:31:59 | D | - Calibrating transformer_blocks.17.attn.add_k_proj.weight quantizer +25-08-28 16:31:59 | D | - Calibrating transformer_blocks.17.attn.add_v_proj.weight quantizer +25-08-28 16:32:00 | D | - Calibrating transformer_blocks.17.attn.to_out.0.weight quantizer +25-08-28 16:32:00 | D | - Calibrating transformer_blocks.17.attn.to_add_out.weight quantizer +25-08-28 16:32:00 | D | - Calibrating transformer_blocks.17.ff.net.0.proj.weight quantizer +25-08-28 16:32:00 | D | - Calibrating transformer_blocks.17.ff.net.2.linear.weight quantizer +25-08-28 16:32:00 | D | - Calibrating transformer_blocks.17.ff_context.net.0.proj.weight quantizer +25-08-28 16:32:00 | D | - Calibrating transformer_blocks.17.ff_context.net.2.linear.weight quantizer +25-08-28 16:32:01 | D | - Calibrating weights: block transformer_blocks.18 +25-08-28 16:32:01 | D | - Calibrating transformer_blocks.18.norm1.linear.weight quantizer +25-08-28 16:32:01 | D | - Calibrating transformer_blocks.18.norm1_context.linear.weight quantizer +25-08-28 16:32:01 | D | - Calibrating transformer_blocks.18.attn.to_q.weight quantizer +25-08-28 16:32:01 | D | - Calibrating transformer_blocks.18.attn.to_k.weight quantizer +25-08-28 16:32:01 | D | - Calibrating transformer_blocks.18.attn.to_v.weight quantizer +25-08-28 16:32:02 | D | - Calibrating transformer_blocks.18.attn.add_q_proj.weight quantizer +25-08-28 16:32:02 | D | - Calibrating transformer_blocks.18.attn.add_k_proj.weight quantizer +25-08-28 16:32:02 | D | - Calibrating transformer_blocks.18.attn.add_v_proj.weight quantizer +25-08-28 16:32:02 | D | - Calibrating transformer_blocks.18.attn.to_out.0.weight quantizer +25-08-28 16:32:02 | D | - Calibrating transformer_blocks.18.attn.to_add_out.weight quantizer +25-08-28 16:32:03 | D | - Calibrating transformer_blocks.18.ff.net.0.proj.weight quantizer +25-08-28 16:32:03 | D | - Calibrating transformer_blocks.18.ff.net.2.linear.weight quantizer +25-08-28 16:32:03 | D | - Calibrating transformer_blocks.18.ff_context.net.0.proj.weight quantizer +25-08-28 16:32:03 | D | - Calibrating transformer_blocks.18.ff_context.net.2.linear.weight quantizer +25-08-28 16:32:03 | D | - Calibrating weights: block single_transformer_blocks.0 +25-08-28 16:32:03 | D | - Calibrating single_transformer_blocks.0.norm.linear.weight quantizer +25-08-28 16:32:03 | D | - Calibrating single_transformer_blocks.0.attn.to_q.weight quantizer +25-08-28 16:32:04 | D | - Calibrating single_transformer_blocks.0.attn.to_k.weight quantizer +25-08-28 16:32:04 | D | - Calibrating single_transformer_blocks.0.attn.to_v.weight quantizer +25-08-28 16:32:04 | D | - Calibrating single_transformer_blocks.0.proj_out.linears.0.weight quantizer +25-08-28 16:32:04 | D | - Calibrating single_transformer_blocks.0.proj_mlp.weight quantizer +25-08-28 16:32:04 | D | - Calibrating single_transformer_blocks.0.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:05 | D | - Calibrating weights: block single_transformer_blocks.1 +25-08-28 16:32:05 | D | - Calibrating single_transformer_blocks.1.norm.linear.weight quantizer +25-08-28 16:32:05 | D | - Calibrating single_transformer_blocks.1.attn.to_q.weight quantizer +25-08-28 16:32:05 | D | - Calibrating single_transformer_blocks.1.attn.to_k.weight quantizer +25-08-28 16:32:05 | D | - Calibrating single_transformer_blocks.1.attn.to_v.weight quantizer +25-08-28 16:32:05 | D | - Calibrating single_transformer_blocks.1.proj_out.linears.0.weight quantizer +25-08-28 16:32:05 | D | - Calibrating single_transformer_blocks.1.proj_mlp.weight quantizer +25-08-28 16:32:06 | D | - Calibrating single_transformer_blocks.1.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:06 | D | - Calibrating weights: block single_transformer_blocks.2 +25-08-28 16:32:06 | D | - Calibrating single_transformer_blocks.2.norm.linear.weight quantizer +25-08-28 16:32:06 | D | - Calibrating single_transformer_blocks.2.attn.to_q.weight quantizer +25-08-28 16:32:06 | D | - Calibrating single_transformer_blocks.2.attn.to_k.weight quantizer +25-08-28 16:32:06 | D | - Calibrating single_transformer_blocks.2.attn.to_v.weight quantizer +25-08-28 16:32:07 | D | - Calibrating single_transformer_blocks.2.proj_out.linears.0.weight quantizer +25-08-28 16:32:07 | D | - Calibrating single_transformer_blocks.2.proj_mlp.weight quantizer +25-08-28 16:32:07 | D | - Calibrating single_transformer_blocks.2.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:07 | D | - Calibrating weights: block single_transformer_blocks.3 +25-08-28 16:32:07 | D | - Calibrating single_transformer_blocks.3.norm.linear.weight quantizer +25-08-28 16:32:07 | D | - Calibrating single_transformer_blocks.3.attn.to_q.weight quantizer +25-08-28 16:32:08 | D | - Calibrating single_transformer_blocks.3.attn.to_k.weight quantizer +25-08-28 16:32:08 | D | - Calibrating single_transformer_blocks.3.attn.to_v.weight quantizer +25-08-28 16:32:08 | D | - Calibrating single_transformer_blocks.3.proj_out.linears.0.weight quantizer +25-08-28 16:32:08 | D | - Calibrating single_transformer_blocks.3.proj_mlp.weight quantizer +25-08-28 16:32:08 | D | - Calibrating single_transformer_blocks.3.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:08 | D | - Calibrating weights: block single_transformer_blocks.4 +25-08-28 16:32:08 | D | - Calibrating single_transformer_blocks.4.norm.linear.weight quantizer +25-08-28 16:32:09 | D | - Calibrating single_transformer_blocks.4.attn.to_q.weight quantizer +25-08-28 16:32:09 | D | - Calibrating single_transformer_blocks.4.attn.to_k.weight quantizer +25-08-28 16:32:09 | D | - Calibrating single_transformer_blocks.4.attn.to_v.weight quantizer +25-08-28 16:32:09 | D | - Calibrating single_transformer_blocks.4.proj_out.linears.0.weight quantizer +25-08-28 16:32:09 | D | - Calibrating single_transformer_blocks.4.proj_mlp.weight quantizer +25-08-28 16:32:10 | D | - Calibrating single_transformer_blocks.4.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:10 | D | - Calibrating weights: block single_transformer_blocks.5 +25-08-28 16:32:10 | D | - Calibrating single_transformer_blocks.5.norm.linear.weight quantizer +25-08-28 16:32:10 | D | - Calibrating single_transformer_blocks.5.attn.to_q.weight quantizer +25-08-28 16:32:10 | D | - Calibrating single_transformer_blocks.5.attn.to_k.weight quantizer +25-08-28 16:32:10 | D | - Calibrating single_transformer_blocks.5.attn.to_v.weight quantizer +25-08-28 16:32:11 | D | - Calibrating single_transformer_blocks.5.proj_out.linears.0.weight quantizer +25-08-28 16:32:11 | D | - Calibrating single_transformer_blocks.5.proj_mlp.weight quantizer +25-08-28 16:32:11 | D | - Calibrating single_transformer_blocks.5.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:11 | D | - Calibrating weights: block single_transformer_blocks.6 +25-08-28 16:32:11 | D | - Calibrating single_transformer_blocks.6.norm.linear.weight quantizer +25-08-28 16:32:11 | D | - Calibrating single_transformer_blocks.6.attn.to_q.weight quantizer +25-08-28 16:32:11 | D | - Calibrating single_transformer_blocks.6.attn.to_k.weight quantizer +25-08-28 16:32:12 | D | - Calibrating single_transformer_blocks.6.attn.to_v.weight quantizer +25-08-28 16:32:12 | D | - Calibrating single_transformer_blocks.6.proj_out.linears.0.weight quantizer +25-08-28 16:32:12 | D | - Calibrating single_transformer_blocks.6.proj_mlp.weight quantizer +25-08-28 16:32:12 | D | - Calibrating single_transformer_blocks.6.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:12 | D | - Calibrating weights: block single_transformer_blocks.7 +25-08-28 16:32:12 | D | - Calibrating single_transformer_blocks.7.norm.linear.weight quantizer +25-08-28 16:32:13 | D | - Calibrating single_transformer_blocks.7.attn.to_q.weight quantizer +25-08-28 16:32:13 | D | - Calibrating single_transformer_blocks.7.attn.to_k.weight quantizer +25-08-28 16:32:13 | D | - Calibrating single_transformer_blocks.7.attn.to_v.weight quantizer +25-08-28 16:32:13 | D | - Calibrating single_transformer_blocks.7.proj_out.linears.0.weight quantizer +25-08-28 16:32:13 | D | - Calibrating single_transformer_blocks.7.proj_mlp.weight quantizer +25-08-28 16:32:14 | D | - Calibrating single_transformer_blocks.7.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:14 | D | - Calibrating weights: block single_transformer_blocks.8 +25-08-28 16:32:14 | D | - Calibrating single_transformer_blocks.8.norm.linear.weight quantizer +25-08-28 16:32:14 | D | - Calibrating single_transformer_blocks.8.attn.to_q.weight quantizer +25-08-28 16:32:14 | D | - Calibrating single_transformer_blocks.8.attn.to_k.weight quantizer +25-08-28 16:32:14 | D | - Calibrating single_transformer_blocks.8.attn.to_v.weight quantizer +25-08-28 16:32:14 | D | - Calibrating single_transformer_blocks.8.proj_out.linears.0.weight quantizer +25-08-28 16:32:15 | D | - Calibrating single_transformer_blocks.8.proj_mlp.weight quantizer +25-08-28 16:32:15 | D | - Calibrating single_transformer_blocks.8.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:15 | D | - Calibrating weights: block single_transformer_blocks.9 +25-08-28 16:32:15 | D | - Calibrating single_transformer_blocks.9.norm.linear.weight quantizer +25-08-28 16:32:15 | D | - Calibrating single_transformer_blocks.9.attn.to_q.weight quantizer +25-08-28 16:32:15 | D | - Calibrating single_transformer_blocks.9.attn.to_k.weight quantizer +25-08-28 16:32:16 | D | - Calibrating single_transformer_blocks.9.attn.to_v.weight quantizer +25-08-28 16:32:16 | D | - Calibrating single_transformer_blocks.9.proj_out.linears.0.weight quantizer +25-08-28 16:32:16 | D | - Calibrating single_transformer_blocks.9.proj_mlp.weight quantizer +25-08-28 16:32:16 | D | - Calibrating single_transformer_blocks.9.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:16 | D | - Calibrating weights: block single_transformer_blocks.10 +25-08-28 16:32:16 | D | - Calibrating single_transformer_blocks.10.norm.linear.weight quantizer +25-08-28 16:32:17 | D | - Calibrating single_transformer_blocks.10.attn.to_q.weight quantizer +25-08-28 16:32:17 | D | - Calibrating single_transformer_blocks.10.attn.to_k.weight quantizer +25-08-28 16:32:17 | D | - Calibrating single_transformer_blocks.10.attn.to_v.weight quantizer +25-08-28 16:32:17 | D | - Calibrating single_transformer_blocks.10.proj_out.linears.0.weight quantizer +25-08-28 16:32:17 | D | - Calibrating single_transformer_blocks.10.proj_mlp.weight quantizer +25-08-28 16:32:17 | D | - Calibrating single_transformer_blocks.10.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:18 | D | - Calibrating weights: block single_transformer_blocks.11 +25-08-28 16:32:18 | D | - Calibrating single_transformer_blocks.11.norm.linear.weight quantizer +25-08-28 16:32:18 | D | - Calibrating single_transformer_blocks.11.attn.to_q.weight quantizer +25-08-28 16:32:18 | D | - Calibrating single_transformer_blocks.11.attn.to_k.weight quantizer +25-08-28 16:32:18 | D | - Calibrating single_transformer_blocks.11.attn.to_v.weight quantizer +25-08-28 16:32:18 | D | - Calibrating single_transformer_blocks.11.proj_out.linears.0.weight quantizer +25-08-28 16:32:19 | D | - Calibrating single_transformer_blocks.11.proj_mlp.weight quantizer +25-08-28 16:32:19 | D | - Calibrating single_transformer_blocks.11.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:19 | D | - Calibrating weights: block single_transformer_blocks.12 +25-08-28 16:32:19 | D | - Calibrating single_transformer_blocks.12.norm.linear.weight quantizer +25-08-28 16:32:19 | D | - Calibrating single_transformer_blocks.12.attn.to_q.weight quantizer +25-08-28 16:32:19 | D | - Calibrating single_transformer_blocks.12.attn.to_k.weight quantizer +25-08-28 16:32:20 | D | - Calibrating single_transformer_blocks.12.attn.to_v.weight quantizer +25-08-28 16:32:20 | D | - Calibrating single_transformer_blocks.12.proj_out.linears.0.weight quantizer +25-08-28 16:32:20 | D | - Calibrating single_transformer_blocks.12.proj_mlp.weight quantizer +25-08-28 16:32:20 | D | - Calibrating single_transformer_blocks.12.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:20 | D | - Calibrating weights: block single_transformer_blocks.13 +25-08-28 16:32:20 | D | - Calibrating single_transformer_blocks.13.norm.linear.weight quantizer +25-08-28 16:32:20 | D | - Calibrating single_transformer_blocks.13.attn.to_q.weight quantizer +25-08-28 16:32:21 | D | - Calibrating single_transformer_blocks.13.attn.to_k.weight quantizer +25-08-28 16:32:21 | D | - Calibrating single_transformer_blocks.13.attn.to_v.weight quantizer +25-08-28 16:32:21 | D | - Calibrating single_transformer_blocks.13.proj_out.linears.0.weight quantizer +25-08-28 16:32:21 | D | - Calibrating single_transformer_blocks.13.proj_mlp.weight quantizer +25-08-28 16:32:21 | D | - Calibrating single_transformer_blocks.13.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:22 | D | - Calibrating weights: block single_transformer_blocks.14 +25-08-28 16:32:22 | D | - Calibrating single_transformer_blocks.14.norm.linear.weight quantizer +25-08-28 16:32:22 | D | - Calibrating single_transformer_blocks.14.attn.to_q.weight quantizer +25-08-28 16:32:22 | D | - Calibrating single_transformer_blocks.14.attn.to_k.weight quantizer +25-08-28 16:32:22 | D | - Calibrating single_transformer_blocks.14.attn.to_v.weight quantizer +25-08-28 16:32:22 | D | - Calibrating single_transformer_blocks.14.proj_out.linears.0.weight quantizer +25-08-28 16:32:23 | D | - Calibrating single_transformer_blocks.14.proj_mlp.weight quantizer +25-08-28 16:32:23 | D | - Calibrating single_transformer_blocks.14.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:23 | D | - Calibrating weights: block single_transformer_blocks.15 +25-08-28 16:32:23 | D | - Calibrating single_transformer_blocks.15.norm.linear.weight quantizer +25-08-28 16:32:23 | D | - Calibrating single_transformer_blocks.15.attn.to_q.weight quantizer +25-08-28 16:32:23 | D | - Calibrating single_transformer_blocks.15.attn.to_k.weight quantizer +25-08-28 16:32:23 | D | - Calibrating single_transformer_blocks.15.attn.to_v.weight quantizer +25-08-28 16:32:24 | D | - Calibrating single_transformer_blocks.15.proj_out.linears.0.weight quantizer +25-08-28 16:32:24 | D | - Calibrating single_transformer_blocks.15.proj_mlp.weight quantizer +25-08-28 16:32:24 | D | - Calibrating single_transformer_blocks.15.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:24 | D | - Calibrating weights: block single_transformer_blocks.16 +25-08-28 16:32:24 | D | - Calibrating single_transformer_blocks.16.norm.linear.weight quantizer +25-08-28 16:32:24 | D | - Calibrating single_transformer_blocks.16.attn.to_q.weight quantizer +25-08-28 16:32:25 | D | - Calibrating single_transformer_blocks.16.attn.to_k.weight quantizer +25-08-28 16:32:25 | D | - Calibrating single_transformer_blocks.16.attn.to_v.weight quantizer +25-08-28 16:32:25 | D | - Calibrating single_transformer_blocks.16.proj_out.linears.0.weight quantizer +25-08-28 16:32:25 | D | - Calibrating single_transformer_blocks.16.proj_mlp.weight quantizer +25-08-28 16:32:25 | D | - Calibrating single_transformer_blocks.16.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:25 | D | - Calibrating weights: block single_transformer_blocks.17 +25-08-28 16:32:25 | D | - Calibrating single_transformer_blocks.17.norm.linear.weight quantizer +25-08-28 16:32:26 | D | - Calibrating single_transformer_blocks.17.attn.to_q.weight quantizer +25-08-28 16:32:26 | D | - Calibrating single_transformer_blocks.17.attn.to_k.weight quantizer +25-08-28 16:32:26 | D | - Calibrating single_transformer_blocks.17.attn.to_v.weight quantizer +25-08-28 16:32:26 | D | - Calibrating single_transformer_blocks.17.proj_out.linears.0.weight quantizer +25-08-28 16:32:26 | D | - Calibrating single_transformer_blocks.17.proj_mlp.weight quantizer +25-08-28 16:32:27 | D | - Calibrating single_transformer_blocks.17.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:27 | D | - Calibrating weights: block single_transformer_blocks.18 +25-08-28 16:32:27 | D | - Calibrating single_transformer_blocks.18.norm.linear.weight quantizer +25-08-28 16:32:27 | D | - Calibrating single_transformer_blocks.18.attn.to_q.weight quantizer +25-08-28 16:32:27 | D | - Calibrating single_transformer_blocks.18.attn.to_k.weight quantizer +25-08-28 16:32:27 | D | - Calibrating single_transformer_blocks.18.attn.to_v.weight quantizer +25-08-28 16:32:28 | D | - Calibrating single_transformer_blocks.18.proj_out.linears.0.weight quantizer +25-08-28 16:32:28 | D | - Calibrating single_transformer_blocks.18.proj_mlp.weight quantizer +25-08-28 16:32:28 | D | - Calibrating single_transformer_blocks.18.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:28 | D | - Calibrating weights: block single_transformer_blocks.19 +25-08-28 16:32:28 | D | - Calibrating single_transformer_blocks.19.norm.linear.weight quantizer +25-08-28 16:32:28 | D | - Calibrating single_transformer_blocks.19.attn.to_q.weight quantizer +25-08-28 16:32:29 | D | - Calibrating single_transformer_blocks.19.attn.to_k.weight quantizer +25-08-28 16:32:29 | D | - Calibrating single_transformer_blocks.19.attn.to_v.weight quantizer +25-08-28 16:32:29 | D | - Calibrating single_transformer_blocks.19.proj_out.linears.0.weight quantizer +25-08-28 16:32:29 | D | - Calibrating single_transformer_blocks.19.proj_mlp.weight quantizer +25-08-28 16:32:29 | D | - Calibrating single_transformer_blocks.19.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:29 | D | - Calibrating weights: block single_transformer_blocks.20 +25-08-28 16:32:29 | D | - Calibrating single_transformer_blocks.20.norm.linear.weight quantizer +25-08-28 16:32:30 | D | - Calibrating single_transformer_blocks.20.attn.to_q.weight quantizer +25-08-28 16:32:30 | D | - Calibrating single_transformer_blocks.20.attn.to_k.weight quantizer +25-08-28 16:32:30 | D | - Calibrating single_transformer_blocks.20.attn.to_v.weight quantizer +25-08-28 16:32:30 | D | - Calibrating single_transformer_blocks.20.proj_out.linears.0.weight quantizer +25-08-28 16:32:30 | D | - Calibrating single_transformer_blocks.20.proj_mlp.weight quantizer +25-08-28 16:32:31 | D | - Calibrating single_transformer_blocks.20.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:31 | D | - Calibrating weights: block single_transformer_blocks.21 +25-08-28 16:32:31 | D | - Calibrating single_transformer_blocks.21.norm.linear.weight quantizer +25-08-28 16:32:31 | D | - Calibrating single_transformer_blocks.21.attn.to_q.weight quantizer +25-08-28 16:32:31 | D | - Calibrating single_transformer_blocks.21.attn.to_k.weight quantizer +25-08-28 16:32:31 | D | - Calibrating single_transformer_blocks.21.attn.to_v.weight quantizer +25-08-28 16:32:31 | D | - Calibrating single_transformer_blocks.21.proj_out.linears.0.weight quantizer +25-08-28 16:32:32 | D | - Calibrating single_transformer_blocks.21.proj_mlp.weight quantizer +25-08-28 16:32:32 | D | - Calibrating single_transformer_blocks.21.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:32 | D | - Calibrating weights: block single_transformer_blocks.22 +25-08-28 16:32:32 | D | - Calibrating single_transformer_blocks.22.norm.linear.weight quantizer +25-08-28 16:32:32 | D | - Calibrating single_transformer_blocks.22.attn.to_q.weight quantizer +25-08-28 16:32:32 | D | - Calibrating single_transformer_blocks.22.attn.to_k.weight quantizer +25-08-28 16:32:33 | D | - Calibrating single_transformer_blocks.22.attn.to_v.weight quantizer +25-08-28 16:32:33 | D | - Calibrating single_transformer_blocks.22.proj_out.linears.0.weight quantizer +25-08-28 16:32:33 | D | - Calibrating single_transformer_blocks.22.proj_mlp.weight quantizer +25-08-28 16:32:33 | D | - Calibrating single_transformer_blocks.22.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:33 | D | - Calibrating weights: block single_transformer_blocks.23 +25-08-28 16:32:33 | D | - Calibrating single_transformer_blocks.23.norm.linear.weight quantizer +25-08-28 16:32:34 | D | - Calibrating single_transformer_blocks.23.attn.to_q.weight quantizer +25-08-28 16:32:34 | D | - Calibrating single_transformer_blocks.23.attn.to_k.weight quantizer +25-08-28 16:32:34 | D | - Calibrating single_transformer_blocks.23.attn.to_v.weight quantizer +25-08-28 16:32:34 | D | - Calibrating single_transformer_blocks.23.proj_out.linears.0.weight quantizer +25-08-28 16:32:34 | D | - Calibrating single_transformer_blocks.23.proj_mlp.weight quantizer +25-08-28 16:32:34 | D | - Calibrating single_transformer_blocks.23.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:35 | D | - Calibrating weights: block single_transformer_blocks.24 +25-08-28 16:32:35 | D | - Calibrating single_transformer_blocks.24.norm.linear.weight quantizer +25-08-28 16:32:35 | D | - Calibrating single_transformer_blocks.24.attn.to_q.weight quantizer +25-08-28 16:32:35 | D | - Calibrating single_transformer_blocks.24.attn.to_k.weight quantizer +25-08-28 16:32:35 | D | - Calibrating single_transformer_blocks.24.attn.to_v.weight quantizer +25-08-28 16:32:35 | D | - Calibrating single_transformer_blocks.24.proj_out.linears.0.weight quantizer +25-08-28 16:32:36 | D | - Calibrating single_transformer_blocks.24.proj_mlp.weight quantizer +25-08-28 16:32:36 | D | - Calibrating single_transformer_blocks.24.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:36 | D | - Calibrating weights: block single_transformer_blocks.25 +25-08-28 16:32:36 | D | - Calibrating single_transformer_blocks.25.norm.linear.weight quantizer +25-08-28 16:32:36 | D | - Calibrating single_transformer_blocks.25.attn.to_q.weight quantizer +25-08-28 16:32:36 | D | - Calibrating single_transformer_blocks.25.attn.to_k.weight quantizer +25-08-28 16:32:37 | D | - Calibrating single_transformer_blocks.25.attn.to_v.weight quantizer +25-08-28 16:32:37 | D | - Calibrating single_transformer_blocks.25.proj_out.linears.0.weight quantizer +25-08-28 16:32:37 | D | - Calibrating single_transformer_blocks.25.proj_mlp.weight quantizer +25-08-28 16:32:37 | D | - Calibrating single_transformer_blocks.25.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:37 | D | - Calibrating weights: block single_transformer_blocks.26 +25-08-28 16:32:37 | D | - Calibrating single_transformer_blocks.26.norm.linear.weight quantizer +25-08-28 16:32:37 | D | - Calibrating single_transformer_blocks.26.attn.to_q.weight quantizer +25-08-28 16:32:38 | D | - Calibrating single_transformer_blocks.26.attn.to_k.weight quantizer +25-08-28 16:32:38 | D | - Calibrating single_transformer_blocks.26.attn.to_v.weight quantizer +25-08-28 16:32:38 | D | - Calibrating single_transformer_blocks.26.proj_out.linears.0.weight quantizer +25-08-28 16:32:38 | D | - Calibrating single_transformer_blocks.26.proj_mlp.weight quantizer +25-08-28 16:32:38 | D | - Calibrating single_transformer_blocks.26.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:39 | D | - Calibrating weights: block single_transformer_blocks.27 +25-08-28 16:32:39 | D | - Calibrating single_transformer_blocks.27.norm.linear.weight quantizer +25-08-28 16:32:39 | D | - Calibrating single_transformer_blocks.27.attn.to_q.weight quantizer +25-08-28 16:32:39 | D | - Calibrating single_transformer_blocks.27.attn.to_k.weight quantizer +25-08-28 16:32:39 | D | - Calibrating single_transformer_blocks.27.attn.to_v.weight quantizer +25-08-28 16:32:39 | D | - Calibrating single_transformer_blocks.27.proj_out.linears.0.weight quantizer +25-08-28 16:32:40 | D | - Calibrating single_transformer_blocks.27.proj_mlp.weight quantizer +25-08-28 16:32:40 | D | - Calibrating single_transformer_blocks.27.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:40 | D | - Calibrating weights: block single_transformer_blocks.28 +25-08-28 16:32:40 | D | - Calibrating single_transformer_blocks.28.norm.linear.weight quantizer +25-08-28 16:32:40 | D | - Calibrating single_transformer_blocks.28.attn.to_q.weight quantizer +25-08-28 16:32:40 | D | - Calibrating single_transformer_blocks.28.attn.to_k.weight quantizer +25-08-28 16:32:40 | D | - Calibrating single_transformer_blocks.28.attn.to_v.weight quantizer +25-08-28 16:32:41 | D | - Calibrating single_transformer_blocks.28.proj_out.linears.0.weight quantizer +25-08-28 16:32:41 | D | - Calibrating single_transformer_blocks.28.proj_mlp.weight quantizer +25-08-28 16:32:41 | D | - Calibrating single_transformer_blocks.28.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:41 | D | - Calibrating weights: block single_transformer_blocks.29 +25-08-28 16:32:41 | D | - Calibrating single_transformer_blocks.29.norm.linear.weight quantizer +25-08-28 16:32:41 | D | - Calibrating single_transformer_blocks.29.attn.to_q.weight quantizer +25-08-28 16:32:42 | D | - Calibrating single_transformer_blocks.29.attn.to_k.weight quantizer +25-08-28 16:32:42 | D | - Calibrating single_transformer_blocks.29.attn.to_v.weight quantizer +25-08-28 16:32:42 | D | - Calibrating single_transformer_blocks.29.proj_out.linears.0.weight quantizer +25-08-28 16:32:42 | D | - Calibrating single_transformer_blocks.29.proj_mlp.weight quantizer +25-08-28 16:32:42 | D | - Calibrating single_transformer_blocks.29.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:43 | D | - Calibrating weights: block single_transformer_blocks.30 +25-08-28 16:32:43 | D | - Calibrating single_transformer_blocks.30.norm.linear.weight quantizer +25-08-28 16:32:43 | D | - Calibrating single_transformer_blocks.30.attn.to_q.weight quantizer +25-08-28 16:32:43 | D | - Calibrating single_transformer_blocks.30.attn.to_k.weight quantizer +25-08-28 16:32:43 | D | - Calibrating single_transformer_blocks.30.attn.to_v.weight quantizer +25-08-28 16:32:43 | D | - Calibrating single_transformer_blocks.30.proj_out.linears.0.weight quantizer +25-08-28 16:32:44 | D | - Calibrating single_transformer_blocks.30.proj_mlp.weight quantizer +25-08-28 16:32:44 | D | - Calibrating single_transformer_blocks.30.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:44 | D | - Calibrating weights: block single_transformer_blocks.31 +25-08-28 16:32:44 | D | - Calibrating single_transformer_blocks.31.norm.linear.weight quantizer +25-08-28 16:32:44 | D | - Calibrating single_transformer_blocks.31.attn.to_q.weight quantizer +25-08-28 16:32:44 | D | - Calibrating single_transformer_blocks.31.attn.to_k.weight quantizer +25-08-28 16:32:44 | D | - Calibrating single_transformer_blocks.31.attn.to_v.weight quantizer +25-08-28 16:32:45 | D | - Calibrating single_transformer_blocks.31.proj_out.linears.0.weight quantizer +25-08-28 16:32:45 | D | - Calibrating single_transformer_blocks.31.proj_mlp.weight quantizer +25-08-28 16:32:45 | D | - Calibrating single_transformer_blocks.31.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:45 | D | - Calibrating weights: block single_transformer_blocks.32 +25-08-28 16:32:45 | D | - Calibrating single_transformer_blocks.32.norm.linear.weight quantizer +25-08-28 16:32:45 | D | - Calibrating single_transformer_blocks.32.attn.to_q.weight quantizer +25-08-28 16:32:46 | D | - Calibrating single_transformer_blocks.32.attn.to_k.weight quantizer +25-08-28 16:32:46 | D | - Calibrating single_transformer_blocks.32.attn.to_v.weight quantizer +25-08-28 16:32:46 | D | - Calibrating single_transformer_blocks.32.proj_out.linears.0.weight quantizer +25-08-28 16:32:46 | D | - Calibrating single_transformer_blocks.32.proj_mlp.weight quantizer +25-08-28 16:32:46 | D | - Calibrating single_transformer_blocks.32.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:46 | D | - Calibrating weights: block single_transformer_blocks.33 +25-08-28 16:32:46 | D | - Calibrating single_transformer_blocks.33.norm.linear.weight quantizer +25-08-28 16:32:47 | D | - Calibrating single_transformer_blocks.33.attn.to_q.weight quantizer +25-08-28 16:32:47 | D | - Calibrating single_transformer_blocks.33.attn.to_k.weight quantizer +25-08-28 16:32:47 | D | - Calibrating single_transformer_blocks.33.attn.to_v.weight quantizer +25-08-28 16:32:47 | D | - Calibrating single_transformer_blocks.33.proj_out.linears.0.weight quantizer +25-08-28 16:32:47 | D | - Calibrating single_transformer_blocks.33.proj_mlp.weight quantizer +25-08-28 16:32:48 | D | - Calibrating single_transformer_blocks.33.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:48 | D | - Calibrating weights: block single_transformer_blocks.34 +25-08-28 16:32:48 | D | - Calibrating single_transformer_blocks.34.norm.linear.weight quantizer +25-08-28 16:32:48 | D | - Calibrating single_transformer_blocks.34.attn.to_q.weight quantizer +25-08-28 16:32:48 | D | - Calibrating single_transformer_blocks.34.attn.to_k.weight quantizer +25-08-28 16:32:48 | D | - Calibrating single_transformer_blocks.34.attn.to_v.weight quantizer +25-08-28 16:32:49 | D | - Calibrating single_transformer_blocks.34.proj_out.linears.0.weight quantizer +25-08-28 16:32:49 | D | - Calibrating single_transformer_blocks.34.proj_mlp.weight quantizer +25-08-28 16:32:49 | D | - Calibrating single_transformer_blocks.34.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:49 | D | - Calibrating weights: block single_transformer_blocks.35 +25-08-28 16:32:49 | D | - Calibrating single_transformer_blocks.35.norm.linear.weight quantizer +25-08-28 16:32:49 | D | - Calibrating single_transformer_blocks.35.attn.to_q.weight quantizer +25-08-28 16:32:49 | D | - Calibrating single_transformer_blocks.35.attn.to_k.weight quantizer +25-08-28 16:32:50 | D | - Calibrating single_transformer_blocks.35.attn.to_v.weight quantizer +25-08-28 16:32:50 | D | - Calibrating single_transformer_blocks.35.proj_out.linears.0.weight quantizer +25-08-28 16:32:50 | D | - Calibrating single_transformer_blocks.35.proj_mlp.weight quantizer +25-08-28 16:32:50 | D | - Calibrating single_transformer_blocks.35.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:50 | D | - Calibrating weights: block single_transformer_blocks.36 +25-08-28 16:32:50 | D | - Calibrating single_transformer_blocks.36.norm.linear.weight quantizer +25-08-28 16:32:51 | D | - Calibrating single_transformer_blocks.36.attn.to_q.weight quantizer +25-08-28 16:32:51 | D | - Calibrating single_transformer_blocks.36.attn.to_k.weight quantizer +25-08-28 16:32:51 | D | - Calibrating single_transformer_blocks.36.attn.to_v.weight quantizer +25-08-28 16:32:51 | D | - Calibrating single_transformer_blocks.36.proj_out.linears.0.weight quantizer +25-08-28 16:32:51 | D | - Calibrating single_transformer_blocks.36.proj_mlp.weight quantizer +25-08-28 16:32:51 | D | - Calibrating single_transformer_blocks.36.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:52 | D | - Calibrating weights: block single_transformer_blocks.37 +25-08-28 16:32:52 | D | - Calibrating single_transformer_blocks.37.norm.linear.weight quantizer +25-08-28 16:32:52 | D | - Calibrating single_transformer_blocks.37.attn.to_q.weight quantizer +25-08-28 16:32:52 | D | - Calibrating single_transformer_blocks.37.attn.to_k.weight quantizer +25-08-28 16:32:52 | D | - Calibrating single_transformer_blocks.37.attn.to_v.weight quantizer +25-08-28 16:32:52 | D | - Calibrating single_transformer_blocks.37.proj_out.linears.0.weight quantizer +25-08-28 16:32:53 | D | - Calibrating single_transformer_blocks.37.proj_mlp.weight quantizer +25-08-28 16:32:53 | D | - Calibrating single_transformer_blocks.37.proj_out.linears.1.linear.weight quantizer +25-08-28 16:32:53 | D | - Quantizing weights: block transformer_blocks.0 +25-08-28 16:32:53 | D | - Quantizing transformer_blocks.0.norm1.linear.weight +25-08-28 16:32:53 | D | + quant_dtype: sint4 +25-08-28 16:32:53 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:53 | D | + scale_dtype: (None,) +25-08-28 16:32:53 | D | - Quantizing transformer_blocks.0.norm1_context.linear.weight +25-08-28 16:32:53 | D | + quant_dtype: sint4 +25-08-28 16:32:53 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:53 | D | + scale_dtype: (None,) +25-08-28 16:32:53 | D | - Quantizing transformer_blocks.0.attn.to_q.weight +25-08-28 16:32:53 | D | + quant_dtype: sint4 +25-08-28 16:32:53 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:53 | D | + scale_dtype: (None,) +25-08-28 16:32:54 | D | - Quantizing transformer_blocks.0.attn.to_k.weight +25-08-28 16:32:54 | D | + quant_dtype: sint4 +25-08-28 16:32:54 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:54 | D | + scale_dtype: (None,) +25-08-28 16:32:54 | D | - Quantizing transformer_blocks.0.attn.to_v.weight +25-08-28 16:32:54 | D | + quant_dtype: sint4 +25-08-28 16:32:54 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:54 | D | + scale_dtype: (None,) +25-08-28 16:32:54 | D | - Quantizing transformer_blocks.0.attn.add_q_proj.weight +25-08-28 16:32:54 | D | + quant_dtype: sint4 +25-08-28 16:32:54 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:54 | D | + scale_dtype: (None,) +25-08-28 16:32:54 | D | - Quantizing transformer_blocks.0.attn.add_k_proj.weight +25-08-28 16:32:54 | D | + quant_dtype: sint4 +25-08-28 16:32:54 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:54 | D | + scale_dtype: (None,) +25-08-28 16:32:54 | D | - Quantizing transformer_blocks.0.attn.add_v_proj.weight +25-08-28 16:32:54 | D | + quant_dtype: sint4 +25-08-28 16:32:54 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:54 | D | + scale_dtype: (None,) +25-08-28 16:32:55 | D | - Quantizing transformer_blocks.0.attn.to_out.0.weight +25-08-28 16:32:55 | D | + quant_dtype: sint4 +25-08-28 16:32:55 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:55 | D | + scale_dtype: (None,) +25-08-28 16:32:55 | D | - Quantizing transformer_blocks.0.attn.to_add_out.weight +25-08-28 16:32:55 | D | + quant_dtype: sint4 +25-08-28 16:32:55 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:55 | D | + scale_dtype: (None,) +25-08-28 16:32:55 | D | - Quantizing transformer_blocks.0.ff.net.0.proj.weight +25-08-28 16:32:55 | D | + quant_dtype: sint4 +25-08-28 16:32:55 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:55 | D | + scale_dtype: (None,) +25-08-28 16:32:55 | D | - Quantizing transformer_blocks.0.ff.net.2.linear.weight +25-08-28 16:32:55 | D | + quant_dtype: sint4 +25-08-28 16:32:55 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:55 | D | + scale_dtype: (None,) +25-08-28 16:32:55 | D | - Quantizing transformer_blocks.0.ff_context.net.0.proj.weight +25-08-28 16:32:55 | D | + quant_dtype: sint4 +25-08-28 16:32:55 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:55 | D | + scale_dtype: (None,) +25-08-28 16:32:55 | D | - Quantizing transformer_blocks.0.ff_context.net.2.linear.weight +25-08-28 16:32:55 | D | + quant_dtype: sint4 +25-08-28 16:32:55 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:55 | D | + scale_dtype: (None,) +25-08-28 16:32:56 | D | - Quantizing weights: block transformer_blocks.1 +25-08-28 16:32:56 | D | - Quantizing transformer_blocks.1.norm1.linear.weight +25-08-28 16:32:56 | D | + quant_dtype: sint4 +25-08-28 16:32:56 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:56 | D | + scale_dtype: (None,) +25-08-28 16:32:56 | D | - Quantizing transformer_blocks.1.norm1_context.linear.weight +25-08-28 16:32:56 | D | + quant_dtype: sint4 +25-08-28 16:32:56 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:56 | D | + scale_dtype: (None,) +25-08-28 16:32:56 | D | - Quantizing transformer_blocks.1.attn.to_q.weight +25-08-28 16:32:56 | D | + quant_dtype: sint4 +25-08-28 16:32:56 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:56 | D | + scale_dtype: (None,) +25-08-28 16:32:56 | D | - Quantizing transformer_blocks.1.attn.to_k.weight +25-08-28 16:32:56 | D | + quant_dtype: sint4 +25-08-28 16:32:56 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:56 | D | + scale_dtype: (None,) +25-08-28 16:32:56 | D | - Quantizing transformer_blocks.1.attn.to_v.weight +25-08-28 16:32:56 | D | + quant_dtype: sint4 +25-08-28 16:32:56 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:56 | D | + scale_dtype: (None,) +25-08-28 16:32:57 | D | - Quantizing transformer_blocks.1.attn.add_q_proj.weight +25-08-28 16:32:57 | D | + quant_dtype: sint4 +25-08-28 16:32:57 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:57 | D | + scale_dtype: (None,) +25-08-28 16:32:57 | D | - Quantizing transformer_blocks.1.attn.add_k_proj.weight +25-08-28 16:32:57 | D | + quant_dtype: sint4 +25-08-28 16:32:57 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:57 | D | + scale_dtype: (None,) +25-08-28 16:32:57 | D | - Quantizing transformer_blocks.1.attn.add_v_proj.weight +25-08-28 16:32:57 | D | + quant_dtype: sint4 +25-08-28 16:32:57 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:57 | D | + scale_dtype: (None,) +25-08-28 16:32:57 | D | - Quantizing transformer_blocks.1.attn.to_out.0.weight +25-08-28 16:32:57 | D | + quant_dtype: sint4 +25-08-28 16:32:57 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:57 | D | + scale_dtype: (None,) +25-08-28 16:32:57 | D | - Quantizing transformer_blocks.1.attn.to_add_out.weight +25-08-28 16:32:57 | D | + quant_dtype: sint4 +25-08-28 16:32:57 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:57 | D | + scale_dtype: (None,) +25-08-28 16:32:58 | D | - Quantizing transformer_blocks.1.ff.net.0.proj.weight +25-08-28 16:32:58 | D | + quant_dtype: sint4 +25-08-28 16:32:58 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:58 | D | + scale_dtype: (None,) +25-08-28 16:32:58 | D | - Quantizing transformer_blocks.1.ff.net.2.linear.weight +25-08-28 16:32:58 | D | + quant_dtype: sint4 +25-08-28 16:32:58 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:58 | D | + scale_dtype: (None,) +25-08-28 16:32:58 | D | - Quantizing transformer_blocks.1.ff_context.net.0.proj.weight +25-08-28 16:32:58 | D | + quant_dtype: sint4 +25-08-28 16:32:58 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:58 | D | + scale_dtype: (None,) +25-08-28 16:32:58 | D | - Quantizing transformer_blocks.1.ff_context.net.2.linear.weight +25-08-28 16:32:58 | D | + quant_dtype: sint4 +25-08-28 16:32:58 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:58 | D | + scale_dtype: (None,) +25-08-28 16:32:58 | D | - Quantizing weights: block transformer_blocks.2 +25-08-28 16:32:58 | D | - Quantizing transformer_blocks.2.norm1.linear.weight +25-08-28 16:32:58 | D | + quant_dtype: sint4 +25-08-28 16:32:58 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:58 | D | + scale_dtype: (None,) +25-08-28 16:32:58 | D | - Quantizing transformer_blocks.2.norm1_context.linear.weight +25-08-28 16:32:58 | D | + quant_dtype: sint4 +25-08-28 16:32:58 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:58 | D | + scale_dtype: (None,) +25-08-28 16:32:59 | D | - Quantizing transformer_blocks.2.attn.to_q.weight +25-08-28 16:32:59 | D | + quant_dtype: sint4 +25-08-28 16:32:59 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:59 | D | + scale_dtype: (None,) +25-08-28 16:32:59 | D | - Quantizing transformer_blocks.2.attn.to_k.weight +25-08-28 16:32:59 | D | + quant_dtype: sint4 +25-08-28 16:32:59 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:59 | D | + scale_dtype: (None,) +25-08-28 16:32:59 | D | - Quantizing transformer_blocks.2.attn.to_v.weight +25-08-28 16:32:59 | D | + quant_dtype: sint4 +25-08-28 16:32:59 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:59 | D | + scale_dtype: (None,) +25-08-28 16:32:59 | D | - Quantizing transformer_blocks.2.attn.add_q_proj.weight +25-08-28 16:32:59 | D | + quant_dtype: sint4 +25-08-28 16:32:59 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:59 | D | + scale_dtype: (None,) +25-08-28 16:32:59 | D | - Quantizing transformer_blocks.2.attn.add_k_proj.weight +25-08-28 16:32:59 | D | + quant_dtype: sint4 +25-08-28 16:32:59 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:32:59 | D | + scale_dtype: (None,) +25-08-28 16:33:00 | D | - Quantizing transformer_blocks.2.attn.add_v_proj.weight +25-08-28 16:33:00 | D | + quant_dtype: sint4 +25-08-28 16:33:00 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:00 | D | + scale_dtype: (None,) +25-08-28 16:33:00 | D | - Quantizing transformer_blocks.2.attn.to_out.0.weight +25-08-28 16:33:00 | D | + quant_dtype: sint4 +25-08-28 16:33:00 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:00 | D | + scale_dtype: (None,) +25-08-28 16:33:00 | D | - Quantizing transformer_blocks.2.attn.to_add_out.weight +25-08-28 16:33:00 | D | + quant_dtype: sint4 +25-08-28 16:33:00 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:00 | D | + scale_dtype: (None,) +25-08-28 16:33:00 | D | - Quantizing transformer_blocks.2.ff.net.0.proj.weight +25-08-28 16:33:00 | D | + quant_dtype: sint4 +25-08-28 16:33:00 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:00 | D | + scale_dtype: (None,) +25-08-28 16:33:00 | D | - Quantizing transformer_blocks.2.ff.net.2.linear.weight +25-08-28 16:33:00 | D | + quant_dtype: sint4 +25-08-28 16:33:00 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:00 | D | + scale_dtype: (None,) +25-08-28 16:33:01 | D | - Quantizing transformer_blocks.2.ff_context.net.0.proj.weight +25-08-28 16:33:01 | D | + quant_dtype: sint4 +25-08-28 16:33:01 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:01 | D | + scale_dtype: (None,) +25-08-28 16:33:01 | D | - Quantizing transformer_blocks.2.ff_context.net.2.linear.weight +25-08-28 16:33:01 | D | + quant_dtype: sint4 +25-08-28 16:33:01 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:01 | D | + scale_dtype: (None,) +25-08-28 16:33:01 | D | - Quantizing weights: block transformer_blocks.3 +25-08-28 16:33:01 | D | - Quantizing transformer_blocks.3.norm1.linear.weight +25-08-28 16:33:01 | D | + quant_dtype: sint4 +25-08-28 16:33:01 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:01 | D | + scale_dtype: (None,) +25-08-28 16:33:01 | D | - Quantizing transformer_blocks.3.norm1_context.linear.weight +25-08-28 16:33:01 | D | + quant_dtype: sint4 +25-08-28 16:33:01 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:01 | D | + scale_dtype: (None,) +25-08-28 16:33:01 | D | - Quantizing transformer_blocks.3.attn.to_q.weight +25-08-28 16:33:01 | D | + quant_dtype: sint4 +25-08-28 16:33:01 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:01 | D | + scale_dtype: (None,) +25-08-28 16:33:02 | D | - Quantizing transformer_blocks.3.attn.to_k.weight +25-08-28 16:33:02 | D | + quant_dtype: sint4 +25-08-28 16:33:02 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:02 | D | + scale_dtype: (None,) +25-08-28 16:33:02 | D | - Quantizing transformer_blocks.3.attn.to_v.weight +25-08-28 16:33:02 | D | + quant_dtype: sint4 +25-08-28 16:33:02 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:02 | D | + scale_dtype: (None,) +25-08-28 16:33:02 | D | - Quantizing transformer_blocks.3.attn.add_q_proj.weight +25-08-28 16:33:02 | D | + quant_dtype: sint4 +25-08-28 16:33:02 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:02 | D | + scale_dtype: (None,) +25-08-28 16:33:02 | D | - Quantizing transformer_blocks.3.attn.add_k_proj.weight +25-08-28 16:33:02 | D | + quant_dtype: sint4 +25-08-28 16:33:02 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:02 | D | + scale_dtype: (None,) +25-08-28 16:33:02 | D | - Quantizing transformer_blocks.3.attn.add_v_proj.weight +25-08-28 16:33:02 | D | + quant_dtype: sint4 +25-08-28 16:33:02 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:02 | D | + scale_dtype: (None,) +25-08-28 16:33:03 | D | - Quantizing transformer_blocks.3.attn.to_out.0.weight +25-08-28 16:33:03 | D | + quant_dtype: sint4 +25-08-28 16:33:03 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:03 | D | + scale_dtype: (None,) +25-08-28 16:33:03 | D | - Quantizing transformer_blocks.3.attn.to_add_out.weight +25-08-28 16:33:03 | D | + quant_dtype: sint4 +25-08-28 16:33:03 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:03 | D | + scale_dtype: (None,) +25-08-28 16:33:03 | D | - Quantizing transformer_blocks.3.ff.net.0.proj.weight +25-08-28 16:33:03 | D | + quant_dtype: sint4 +25-08-28 16:33:03 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:03 | D | + scale_dtype: (None,) +25-08-28 16:33:03 | D | - Quantizing transformer_blocks.3.ff.net.2.linear.weight +25-08-28 16:33:03 | D | + quant_dtype: sint4 +25-08-28 16:33:03 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:03 | D | + scale_dtype: (None,) +25-08-28 16:33:03 | D | - Quantizing transformer_blocks.3.ff_context.net.0.proj.weight +25-08-28 16:33:03 | D | + quant_dtype: sint4 +25-08-28 16:33:03 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:03 | D | + scale_dtype: (None,) +25-08-28 16:33:03 | D | - Quantizing transformer_blocks.3.ff_context.net.2.linear.weight +25-08-28 16:33:03 | D | + quant_dtype: sint4 +25-08-28 16:33:03 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:03 | D | + scale_dtype: (None,) +25-08-28 16:33:04 | D | - Quantizing weights: block transformer_blocks.4 +25-08-28 16:33:04 | D | - Quantizing transformer_blocks.4.norm1.linear.weight +25-08-28 16:33:04 | D | + quant_dtype: sint4 +25-08-28 16:33:04 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:04 | D | + scale_dtype: (None,) +25-08-28 16:33:04 | D | - Quantizing transformer_blocks.4.norm1_context.linear.weight +25-08-28 16:33:04 | D | + quant_dtype: sint4 +25-08-28 16:33:04 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:04 | D | + scale_dtype: (None,) +25-08-28 16:33:04 | D | - Quantizing transformer_blocks.4.attn.to_q.weight +25-08-28 16:33:04 | D | + quant_dtype: sint4 +25-08-28 16:33:04 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:04 | D | + scale_dtype: (None,) +25-08-28 16:33:04 | D | - Quantizing transformer_blocks.4.attn.to_k.weight +25-08-28 16:33:04 | D | + quant_dtype: sint4 +25-08-28 16:33:04 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:04 | D | + scale_dtype: (None,) +25-08-28 16:33:04 | D | - Quantizing transformer_blocks.4.attn.to_v.weight +25-08-28 16:33:04 | D | + quant_dtype: sint4 +25-08-28 16:33:04 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:04 | D | + scale_dtype: (None,) +25-08-28 16:33:05 | D | - Quantizing transformer_blocks.4.attn.add_q_proj.weight +25-08-28 16:33:05 | D | + quant_dtype: sint4 +25-08-28 16:33:05 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:05 | D | + scale_dtype: (None,) +25-08-28 16:33:05 | D | - Quantizing transformer_blocks.4.attn.add_k_proj.weight +25-08-28 16:33:05 | D | + quant_dtype: sint4 +25-08-28 16:33:05 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:05 | D | + scale_dtype: (None,) +25-08-28 16:33:05 | D | - Quantizing transformer_blocks.4.attn.add_v_proj.weight +25-08-28 16:33:05 | D | + quant_dtype: sint4 +25-08-28 16:33:05 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:05 | D | + scale_dtype: (None,) +25-08-28 16:33:05 | D | - Quantizing transformer_blocks.4.attn.to_out.0.weight +25-08-28 16:33:05 | D | + quant_dtype: sint4 +25-08-28 16:33:05 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:05 | D | + scale_dtype: (None,) +25-08-28 16:33:05 | D | - Quantizing transformer_blocks.4.attn.to_add_out.weight +25-08-28 16:33:05 | D | + quant_dtype: sint4 +25-08-28 16:33:05 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:05 | D | + scale_dtype: (None,) +25-08-28 16:33:06 | D | - Quantizing transformer_blocks.4.ff.net.0.proj.weight +25-08-28 16:33:06 | D | + quant_dtype: sint4 +25-08-28 16:33:06 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:06 | D | + scale_dtype: (None,) +25-08-28 16:33:06 | D | - Quantizing transformer_blocks.4.ff.net.2.linear.weight +25-08-28 16:33:06 | D | + quant_dtype: sint4 +25-08-28 16:33:06 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:06 | D | + scale_dtype: (None,) +25-08-28 16:33:06 | D | - Quantizing transformer_blocks.4.ff_context.net.0.proj.weight +25-08-28 16:33:06 | D | + quant_dtype: sint4 +25-08-28 16:33:06 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:06 | D | + scale_dtype: (None,) +25-08-28 16:33:06 | D | - Quantizing transformer_blocks.4.ff_context.net.2.linear.weight +25-08-28 16:33:06 | D | + quant_dtype: sint4 +25-08-28 16:33:06 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:06 | D | + scale_dtype: (None,) +25-08-28 16:33:06 | D | - Quantizing weights: block transformer_blocks.5 +25-08-28 16:33:06 | D | - Quantizing transformer_blocks.5.norm1.linear.weight +25-08-28 16:33:06 | D | + quant_dtype: sint4 +25-08-28 16:33:06 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:06 | D | + scale_dtype: (None,) +25-08-28 16:33:07 | D | - Quantizing transformer_blocks.5.norm1_context.linear.weight +25-08-28 16:33:07 | D | + quant_dtype: sint4 +25-08-28 16:33:07 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:07 | D | + scale_dtype: (None,) +25-08-28 16:33:07 | D | - Quantizing transformer_blocks.5.attn.to_q.weight +25-08-28 16:33:07 | D | + quant_dtype: sint4 +25-08-28 16:33:07 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:07 | D | + scale_dtype: (None,) +25-08-28 16:33:07 | D | - Quantizing transformer_blocks.5.attn.to_k.weight +25-08-28 16:33:07 | D | + quant_dtype: sint4 +25-08-28 16:33:07 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:07 | D | + scale_dtype: (None,) +25-08-28 16:33:07 | D | - Quantizing transformer_blocks.5.attn.to_v.weight +25-08-28 16:33:07 | D | + quant_dtype: sint4 +25-08-28 16:33:07 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:07 | D | + scale_dtype: (None,) +25-08-28 16:33:07 | D | - Quantizing transformer_blocks.5.attn.add_q_proj.weight +25-08-28 16:33:07 | D | + quant_dtype: sint4 +25-08-28 16:33:07 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:07 | D | + scale_dtype: (None,) +25-08-28 16:33:08 | D | - Quantizing transformer_blocks.5.attn.add_k_proj.weight +25-08-28 16:33:08 | D | + quant_dtype: sint4 +25-08-28 16:33:08 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:08 | D | + scale_dtype: (None,) +25-08-28 16:33:08 | D | - Quantizing transformer_blocks.5.attn.add_v_proj.weight +25-08-28 16:33:08 | D | + quant_dtype: sint4 +25-08-28 16:33:08 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:08 | D | + scale_dtype: (None,) +25-08-28 16:33:08 | D | - Quantizing transformer_blocks.5.attn.to_out.0.weight +25-08-28 16:33:08 | D | + quant_dtype: sint4 +25-08-28 16:33:08 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:08 | D | + scale_dtype: (None,) +25-08-28 16:33:08 | D | - Quantizing transformer_blocks.5.attn.to_add_out.weight +25-08-28 16:33:08 | D | + quant_dtype: sint4 +25-08-28 16:33:08 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:08 | D | + scale_dtype: (None,) +25-08-28 16:33:08 | D | - Quantizing transformer_blocks.5.ff.net.0.proj.weight +25-08-28 16:33:08 | D | + quant_dtype: sint4 +25-08-28 16:33:08 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:08 | D | + scale_dtype: (None,) +25-08-28 16:33:09 | D | - Quantizing transformer_blocks.5.ff.net.2.linear.weight +25-08-28 16:33:09 | D | + quant_dtype: sint4 +25-08-28 16:33:09 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:09 | D | + scale_dtype: (None,) +25-08-28 16:33:09 | D | - Quantizing transformer_blocks.5.ff_context.net.0.proj.weight +25-08-28 16:33:09 | D | + quant_dtype: sint4 +25-08-28 16:33:09 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:09 | D | + scale_dtype: (None,) +25-08-28 16:33:09 | D | - Quantizing transformer_blocks.5.ff_context.net.2.linear.weight +25-08-28 16:33:09 | D | + quant_dtype: sint4 +25-08-28 16:33:09 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:09 | D | + scale_dtype: (None,) +25-08-28 16:33:09 | D | - Quantizing weights: block transformer_blocks.6 +25-08-28 16:33:09 | D | - Quantizing transformer_blocks.6.norm1.linear.weight +25-08-28 16:33:09 | D | + quant_dtype: sint4 +25-08-28 16:33:09 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:09 | D | + scale_dtype: (None,) +25-08-28 16:33:09 | D | - Quantizing transformer_blocks.6.norm1_context.linear.weight +25-08-28 16:33:09 | D | + quant_dtype: sint4 +25-08-28 16:33:09 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:09 | D | + scale_dtype: (None,) +25-08-28 16:33:09 | D | - Quantizing transformer_blocks.6.attn.to_q.weight +25-08-28 16:33:09 | D | + quant_dtype: sint4 +25-08-28 16:33:09 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:09 | D | + scale_dtype: (None,) +25-08-28 16:33:10 | D | - Quantizing transformer_blocks.6.attn.to_k.weight +25-08-28 16:33:10 | D | + quant_dtype: sint4 +25-08-28 16:33:10 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:10 | D | + scale_dtype: (None,) +25-08-28 16:33:10 | D | - Quantizing transformer_blocks.6.attn.to_v.weight +25-08-28 16:33:10 | D | + quant_dtype: sint4 +25-08-28 16:33:10 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:10 | D | + scale_dtype: (None,) +25-08-28 16:33:10 | D | - Quantizing transformer_blocks.6.attn.add_q_proj.weight +25-08-28 16:33:10 | D | + quant_dtype: sint4 +25-08-28 16:33:10 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:10 | D | + scale_dtype: (None,) +25-08-28 16:33:10 | D | - Quantizing transformer_blocks.6.attn.add_k_proj.weight +25-08-28 16:33:10 | D | + quant_dtype: sint4 +25-08-28 16:33:10 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:10 | D | + scale_dtype: (None,) +25-08-28 16:33:10 | D | - Quantizing transformer_blocks.6.attn.add_v_proj.weight +25-08-28 16:33:10 | D | + quant_dtype: sint4 +25-08-28 16:33:10 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:10 | D | + scale_dtype: (None,) +25-08-28 16:33:11 | D | - Quantizing transformer_blocks.6.attn.to_out.0.weight +25-08-28 16:33:11 | D | + quant_dtype: sint4 +25-08-28 16:33:11 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:11 | D | + scale_dtype: (None,) +25-08-28 16:33:11 | D | - Quantizing transformer_blocks.6.attn.to_add_out.weight +25-08-28 16:33:11 | D | + quant_dtype: sint4 +25-08-28 16:33:11 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:11 | D | + scale_dtype: (None,) +25-08-28 16:33:11 | D | - Quantizing transformer_blocks.6.ff.net.0.proj.weight +25-08-28 16:33:11 | D | + quant_dtype: sint4 +25-08-28 16:33:11 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:11 | D | + scale_dtype: (None,) +25-08-28 16:33:11 | D | - Quantizing transformer_blocks.6.ff.net.2.linear.weight +25-08-28 16:33:11 | D | + quant_dtype: sint4 +25-08-28 16:33:11 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:11 | D | + scale_dtype: (None,) +25-08-28 16:33:11 | D | - Quantizing transformer_blocks.6.ff_context.net.0.proj.weight +25-08-28 16:33:11 | D | + quant_dtype: sint4 +25-08-28 16:33:11 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:11 | D | + scale_dtype: (None,) +25-08-28 16:33:12 | D | - Quantizing transformer_blocks.6.ff_context.net.2.linear.weight +25-08-28 16:33:12 | D | + quant_dtype: sint4 +25-08-28 16:33:12 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:12 | D | + scale_dtype: (None,) +25-08-28 16:33:12 | D | - Quantizing weights: block transformer_blocks.7 +25-08-28 16:33:12 | D | - Quantizing transformer_blocks.7.norm1.linear.weight +25-08-28 16:33:12 | D | + quant_dtype: sint4 +25-08-28 16:33:12 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:12 | D | + scale_dtype: (None,) +25-08-28 16:33:12 | D | - Quantizing transformer_blocks.7.norm1_context.linear.weight +25-08-28 16:33:12 | D | + quant_dtype: sint4 +25-08-28 16:33:12 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:12 | D | + scale_dtype: (None,) +25-08-28 16:33:12 | D | - Quantizing transformer_blocks.7.attn.to_q.weight +25-08-28 16:33:12 | D | + quant_dtype: sint4 +25-08-28 16:33:12 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:12 | D | + scale_dtype: (None,) +25-08-28 16:33:12 | D | - Quantizing transformer_blocks.7.attn.to_k.weight +25-08-28 16:33:12 | D | + quant_dtype: sint4 +25-08-28 16:33:12 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:12 | D | + scale_dtype: (None,) +25-08-28 16:33:13 | D | - Quantizing transformer_blocks.7.attn.to_v.weight +25-08-28 16:33:13 | D | + quant_dtype: sint4 +25-08-28 16:33:13 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:13 | D | + scale_dtype: (None,) +25-08-28 16:33:13 | D | - Quantizing transformer_blocks.7.attn.add_q_proj.weight +25-08-28 16:33:13 | D | + quant_dtype: sint4 +25-08-28 16:33:13 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:13 | D | + scale_dtype: (None,) +25-08-28 16:33:13 | D | - Quantizing transformer_blocks.7.attn.add_k_proj.weight +25-08-28 16:33:13 | D | + quant_dtype: sint4 +25-08-28 16:33:13 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:13 | D | + scale_dtype: (None,) +25-08-28 16:33:13 | D | - Quantizing transformer_blocks.7.attn.add_v_proj.weight +25-08-28 16:33:13 | D | + quant_dtype: sint4 +25-08-28 16:33:13 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:13 | D | + scale_dtype: (None,) +25-08-28 16:33:13 | D | - Quantizing transformer_blocks.7.attn.to_out.0.weight +25-08-28 16:33:13 | D | + quant_dtype: sint4 +25-08-28 16:33:13 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:13 | D | + scale_dtype: (None,) +25-08-28 16:33:13 | D | - Quantizing transformer_blocks.7.attn.to_add_out.weight +25-08-28 16:33:13 | D | + quant_dtype: sint4 +25-08-28 16:33:13 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:13 | D | + scale_dtype: (None,) +25-08-28 16:33:14 | D | - Quantizing transformer_blocks.7.ff.net.0.proj.weight +25-08-28 16:33:14 | D | + quant_dtype: sint4 +25-08-28 16:33:14 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:14 | D | + scale_dtype: (None,) +25-08-28 16:33:14 | D | - Quantizing transformer_blocks.7.ff.net.2.linear.weight +25-08-28 16:33:14 | D | + quant_dtype: sint4 +25-08-28 16:33:14 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:14 | D | + scale_dtype: (None,) +25-08-28 16:33:14 | D | - Quantizing transformer_blocks.7.ff_context.net.0.proj.weight +25-08-28 16:33:14 | D | + quant_dtype: sint4 +25-08-28 16:33:14 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:14 | D | + scale_dtype: (None,) +25-08-28 16:33:14 | D | - Quantizing transformer_blocks.7.ff_context.net.2.linear.weight +25-08-28 16:33:14 | D | + quant_dtype: sint4 +25-08-28 16:33:14 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:14 | D | + scale_dtype: (None,) +25-08-28 16:33:14 | D | - Quantizing weights: block transformer_blocks.8 +25-08-28 16:33:14 | D | - Quantizing transformer_blocks.8.norm1.linear.weight +25-08-28 16:33:14 | D | + quant_dtype: sint4 +25-08-28 16:33:14 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:14 | D | + scale_dtype: (None,) +25-08-28 16:33:15 | D | - Quantizing transformer_blocks.8.norm1_context.linear.weight +25-08-28 16:33:15 | D | + quant_dtype: sint4 +25-08-28 16:33:15 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:15 | D | + scale_dtype: (None,) +25-08-28 16:33:15 | D | - Quantizing transformer_blocks.8.attn.to_q.weight +25-08-28 16:33:15 | D | + quant_dtype: sint4 +25-08-28 16:33:15 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:15 | D | + scale_dtype: (None,) +25-08-28 16:33:15 | D | - Quantizing transformer_blocks.8.attn.to_k.weight +25-08-28 16:33:15 | D | + quant_dtype: sint4 +25-08-28 16:33:15 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:15 | D | + scale_dtype: (None,) +25-08-28 16:33:15 | D | - Quantizing transformer_blocks.8.attn.to_v.weight +25-08-28 16:33:15 | D | + quant_dtype: sint4 +25-08-28 16:33:15 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:15 | D | + scale_dtype: (None,) +25-08-28 16:33:15 | D | - Quantizing transformer_blocks.8.attn.add_q_proj.weight +25-08-28 16:33:15 | D | + quant_dtype: sint4 +25-08-28 16:33:15 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:15 | D | + scale_dtype: (None,) +25-08-28 16:33:16 | D | - Quantizing transformer_blocks.8.attn.add_k_proj.weight +25-08-28 16:33:16 | D | + quant_dtype: sint4 +25-08-28 16:33:16 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:16 | D | + scale_dtype: (None,) +25-08-28 16:33:16 | D | - Quantizing transformer_blocks.8.attn.add_v_proj.weight +25-08-28 16:33:16 | D | + quant_dtype: sint4 +25-08-28 16:33:16 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:16 | D | + scale_dtype: (None,) +25-08-28 16:33:16 | D | - Quantizing transformer_blocks.8.attn.to_out.0.weight +25-08-28 16:33:16 | D | + quant_dtype: sint4 +25-08-28 16:33:16 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:16 | D | + scale_dtype: (None,) +25-08-28 16:33:16 | D | - Quantizing transformer_blocks.8.attn.to_add_out.weight +25-08-28 16:33:16 | D | + quant_dtype: sint4 +25-08-28 16:33:16 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:16 | D | + scale_dtype: (None,) +25-08-28 16:33:16 | D | - Quantizing transformer_blocks.8.ff.net.0.proj.weight +25-08-28 16:33:16 | D | + quant_dtype: sint4 +25-08-28 16:33:16 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:16 | D | + scale_dtype: (None,) +25-08-28 16:33:17 | D | - Quantizing transformer_blocks.8.ff.net.2.linear.weight +25-08-28 16:33:17 | D | + quant_dtype: sint4 +25-08-28 16:33:17 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:17 | D | + scale_dtype: (None,) +25-08-28 16:33:17 | D | - Quantizing transformer_blocks.8.ff_context.net.0.proj.weight +25-08-28 16:33:17 | D | + quant_dtype: sint4 +25-08-28 16:33:17 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:17 | D | + scale_dtype: (None,) +25-08-28 16:33:17 | D | - Quantizing transformer_blocks.8.ff_context.net.2.linear.weight +25-08-28 16:33:17 | D | + quant_dtype: sint4 +25-08-28 16:33:17 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:17 | D | + scale_dtype: (None,) +25-08-28 16:33:17 | D | - Quantizing weights: block transformer_blocks.9 +25-08-28 16:33:17 | D | - Quantizing transformer_blocks.9.norm1.linear.weight +25-08-28 16:33:17 | D | + quant_dtype: sint4 +25-08-28 16:33:17 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:17 | D | + scale_dtype: (None,) +25-08-28 16:33:17 | D | - Quantizing transformer_blocks.9.norm1_context.linear.weight +25-08-28 16:33:17 | D | + quant_dtype: sint4 +25-08-28 16:33:17 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:17 | D | + scale_dtype: (None,) +25-08-28 16:33:17 | D | - Quantizing transformer_blocks.9.attn.to_q.weight +25-08-28 16:33:17 | D | + quant_dtype: sint4 +25-08-28 16:33:17 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:17 | D | + scale_dtype: (None,) +25-08-28 16:33:18 | D | - Quantizing transformer_blocks.9.attn.to_k.weight +25-08-28 16:33:18 | D | + quant_dtype: sint4 +25-08-28 16:33:18 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:18 | D | + scale_dtype: (None,) +25-08-28 16:33:18 | D | - Quantizing transformer_blocks.9.attn.to_v.weight +25-08-28 16:33:18 | D | + quant_dtype: sint4 +25-08-28 16:33:18 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:18 | D | + scale_dtype: (None,) +25-08-28 16:33:18 | D | - Quantizing transformer_blocks.9.attn.add_q_proj.weight +25-08-28 16:33:18 | D | + quant_dtype: sint4 +25-08-28 16:33:18 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:18 | D | + scale_dtype: (None,) +25-08-28 16:33:18 | D | - Quantizing transformer_blocks.9.attn.add_k_proj.weight +25-08-28 16:33:18 | D | + quant_dtype: sint4 +25-08-28 16:33:18 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:18 | D | + scale_dtype: (None,) +25-08-28 16:33:18 | D | - Quantizing transformer_blocks.9.attn.add_v_proj.weight +25-08-28 16:33:18 | D | + quant_dtype: sint4 +25-08-28 16:33:18 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:18 | D | + scale_dtype: (None,) +25-08-28 16:33:19 | D | - Quantizing transformer_blocks.9.attn.to_out.0.weight +25-08-28 16:33:19 | D | + quant_dtype: sint4 +25-08-28 16:33:19 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:19 | D | + scale_dtype: (None,) +25-08-28 16:33:19 | D | - Quantizing transformer_blocks.9.attn.to_add_out.weight +25-08-28 16:33:19 | D | + quant_dtype: sint4 +25-08-28 16:33:19 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:19 | D | + scale_dtype: (None,) +25-08-28 16:33:19 | D | - Quantizing transformer_blocks.9.ff.net.0.proj.weight +25-08-28 16:33:19 | D | + quant_dtype: sint4 +25-08-28 16:33:19 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:19 | D | + scale_dtype: (None,) +25-08-28 16:33:19 | D | - Quantizing transformer_blocks.9.ff.net.2.linear.weight +25-08-28 16:33:19 | D | + quant_dtype: sint4 +25-08-28 16:33:19 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:19 | D | + scale_dtype: (None,) +25-08-28 16:33:19 | D | - Quantizing transformer_blocks.9.ff_context.net.0.proj.weight +25-08-28 16:33:19 | D | + quant_dtype: sint4 +25-08-28 16:33:19 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:19 | D | + scale_dtype: (None,) +25-08-28 16:33:20 | D | - Quantizing transformer_blocks.9.ff_context.net.2.linear.weight +25-08-28 16:33:20 | D | + quant_dtype: sint4 +25-08-28 16:33:20 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:20 | D | + scale_dtype: (None,) +25-08-28 16:33:20 | D | - Quantizing weights: block transformer_blocks.10 +25-08-28 16:33:20 | D | - Quantizing transformer_blocks.10.norm1.linear.weight +25-08-28 16:33:20 | D | + quant_dtype: sint4 +25-08-28 16:33:20 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:20 | D | + scale_dtype: (None,) +25-08-28 16:33:20 | D | - Quantizing transformer_blocks.10.norm1_context.linear.weight +25-08-28 16:33:20 | D | + quant_dtype: sint4 +25-08-28 16:33:20 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:20 | D | + scale_dtype: (None,) +25-08-28 16:33:20 | D | - Quantizing transformer_blocks.10.attn.to_q.weight +25-08-28 16:33:20 | D | + quant_dtype: sint4 +25-08-28 16:33:20 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:20 | D | + scale_dtype: (None,) +25-08-28 16:33:20 | D | - Quantizing transformer_blocks.10.attn.to_k.weight +25-08-28 16:33:20 | D | + quant_dtype: sint4 +25-08-28 16:33:20 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:20 | D | + scale_dtype: (None,) +25-08-28 16:33:21 | D | - Quantizing transformer_blocks.10.attn.to_v.weight +25-08-28 16:33:21 | D | + quant_dtype: sint4 +25-08-28 16:33:21 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:21 | D | + scale_dtype: (None,) +25-08-28 16:33:21 | D | - Quantizing transformer_blocks.10.attn.add_q_proj.weight +25-08-28 16:33:21 | D | + quant_dtype: sint4 +25-08-28 16:33:21 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:21 | D | + scale_dtype: (None,) +25-08-28 16:33:21 | D | - Quantizing transformer_blocks.10.attn.add_k_proj.weight +25-08-28 16:33:21 | D | + quant_dtype: sint4 +25-08-28 16:33:21 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:21 | D | + scale_dtype: (None,) +25-08-28 16:33:21 | D | - Quantizing transformer_blocks.10.attn.add_v_proj.weight +25-08-28 16:33:21 | D | + quant_dtype: sint4 +25-08-28 16:33:21 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:21 | D | + scale_dtype: (None,) +25-08-28 16:33:21 | D | - Quantizing transformer_blocks.10.attn.to_out.0.weight +25-08-28 16:33:21 | D | + quant_dtype: sint4 +25-08-28 16:33:21 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:21 | D | + scale_dtype: (None,) +25-08-28 16:33:22 | D | - Quantizing transformer_blocks.10.attn.to_add_out.weight +25-08-28 16:33:22 | D | + quant_dtype: sint4 +25-08-28 16:33:22 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:22 | D | + scale_dtype: (None,) +25-08-28 16:33:22 | D | - Quantizing transformer_blocks.10.ff.net.0.proj.weight +25-08-28 16:33:22 | D | + quant_dtype: sint4 +25-08-28 16:33:22 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:22 | D | + scale_dtype: (None,) +25-08-28 16:33:22 | D | - Quantizing transformer_blocks.10.ff.net.2.linear.weight +25-08-28 16:33:22 | D | + quant_dtype: sint4 +25-08-28 16:33:22 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:22 | D | + scale_dtype: (None,) +25-08-28 16:33:22 | D | - Quantizing transformer_blocks.10.ff_context.net.0.proj.weight +25-08-28 16:33:22 | D | + quant_dtype: sint4 +25-08-28 16:33:22 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:22 | D | + scale_dtype: (None,) +25-08-28 16:33:22 | D | - Quantizing transformer_blocks.10.ff_context.net.2.linear.weight +25-08-28 16:33:22 | D | + quant_dtype: sint4 +25-08-28 16:33:22 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:22 | D | + scale_dtype: (None,) +25-08-28 16:33:22 | D | - Quantizing weights: block transformer_blocks.11 +25-08-28 16:33:22 | D | - Quantizing transformer_blocks.11.norm1.linear.weight +25-08-28 16:33:22 | D | + quant_dtype: sint4 +25-08-28 16:33:22 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:22 | D | + scale_dtype: (None,) +25-08-28 16:33:23 | D | - Quantizing transformer_blocks.11.norm1_context.linear.weight +25-08-28 16:33:23 | D | + quant_dtype: sint4 +25-08-28 16:33:23 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:23 | D | + scale_dtype: (None,) +25-08-28 16:33:23 | D | - Quantizing transformer_blocks.11.attn.to_q.weight +25-08-28 16:33:23 | D | + quant_dtype: sint4 +25-08-28 16:33:23 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:23 | D | + scale_dtype: (None,) +25-08-28 16:33:23 | D | - Quantizing transformer_blocks.11.attn.to_k.weight +25-08-28 16:33:23 | D | + quant_dtype: sint4 +25-08-28 16:33:23 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:23 | D | + scale_dtype: (None,) +25-08-28 16:33:23 | D | - Quantizing transformer_blocks.11.attn.to_v.weight +25-08-28 16:33:23 | D | + quant_dtype: sint4 +25-08-28 16:33:23 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:23 | D | + scale_dtype: (None,) +25-08-28 16:33:23 | D | - Quantizing transformer_blocks.11.attn.add_q_proj.weight +25-08-28 16:33:23 | D | + quant_dtype: sint4 +25-08-28 16:33:23 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:23 | D | + scale_dtype: (None,) +25-08-28 16:33:24 | D | - Quantizing transformer_blocks.11.attn.add_k_proj.weight +25-08-28 16:33:24 | D | + quant_dtype: sint4 +25-08-28 16:33:24 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:24 | D | + scale_dtype: (None,) +25-08-28 16:33:24 | D | - Quantizing transformer_blocks.11.attn.add_v_proj.weight +25-08-28 16:33:24 | D | + quant_dtype: sint4 +25-08-28 16:33:24 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:24 | D | + scale_dtype: (None,) +25-08-28 16:33:24 | D | - Quantizing transformer_blocks.11.attn.to_out.0.weight +25-08-28 16:33:24 | D | + quant_dtype: sint4 +25-08-28 16:33:24 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:24 | D | + scale_dtype: (None,) +25-08-28 16:33:24 | D | - Quantizing transformer_blocks.11.attn.to_add_out.weight +25-08-28 16:33:24 | D | + quant_dtype: sint4 +25-08-28 16:33:24 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:24 | D | + scale_dtype: (None,) +25-08-28 16:33:24 | D | - Quantizing transformer_blocks.11.ff.net.0.proj.weight +25-08-28 16:33:24 | D | + quant_dtype: sint4 +25-08-28 16:33:24 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:24 | D | + scale_dtype: (None,) +25-08-28 16:33:25 | D | - Quantizing transformer_blocks.11.ff.net.2.linear.weight +25-08-28 16:33:25 | D | + quant_dtype: sint4 +25-08-28 16:33:25 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:25 | D | + scale_dtype: (None,) +25-08-28 16:33:25 | D | - Quantizing transformer_blocks.11.ff_context.net.0.proj.weight +25-08-28 16:33:25 | D | + quant_dtype: sint4 +25-08-28 16:33:25 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:25 | D | + scale_dtype: (None,) +25-08-28 16:33:25 | D | - Quantizing transformer_blocks.11.ff_context.net.2.linear.weight +25-08-28 16:33:25 | D | + quant_dtype: sint4 +25-08-28 16:33:25 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:25 | D | + scale_dtype: (None,) +25-08-28 16:33:25 | D | - Quantizing weights: block transformer_blocks.12 +25-08-28 16:33:25 | D | - Quantizing transformer_blocks.12.norm1.linear.weight +25-08-28 16:33:25 | D | + quant_dtype: sint4 +25-08-28 16:33:25 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:25 | D | + scale_dtype: (None,) +25-08-28 16:33:25 | D | - Quantizing transformer_blocks.12.norm1_context.linear.weight +25-08-28 16:33:25 | D | + quant_dtype: sint4 +25-08-28 16:33:25 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:25 | D | + scale_dtype: (None,) +25-08-28 16:33:26 | D | - Quantizing transformer_blocks.12.attn.to_q.weight +25-08-28 16:33:26 | D | + quant_dtype: sint4 +25-08-28 16:33:26 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:26 | D | + scale_dtype: (None,) +25-08-28 16:33:26 | D | - Quantizing transformer_blocks.12.attn.to_k.weight +25-08-28 16:33:26 | D | + quant_dtype: sint4 +25-08-28 16:33:26 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:26 | D | + scale_dtype: (None,) +25-08-28 16:33:26 | D | - Quantizing transformer_blocks.12.attn.to_v.weight +25-08-28 16:33:26 | D | + quant_dtype: sint4 +25-08-28 16:33:26 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:26 | D | + scale_dtype: (None,) +25-08-28 16:33:26 | D | - Quantizing transformer_blocks.12.attn.add_q_proj.weight +25-08-28 16:33:26 | D | + quant_dtype: sint4 +25-08-28 16:33:26 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:26 | D | + scale_dtype: (None,) +25-08-28 16:33:26 | D | - Quantizing transformer_blocks.12.attn.add_k_proj.weight +25-08-28 16:33:26 | D | + quant_dtype: sint4 +25-08-28 16:33:26 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:26 | D | + scale_dtype: (None,) +25-08-28 16:33:27 | D | - Quantizing transformer_blocks.12.attn.add_v_proj.weight +25-08-28 16:33:27 | D | + quant_dtype: sint4 +25-08-28 16:33:27 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:27 | D | + scale_dtype: (None,) +25-08-28 16:33:27 | D | - Quantizing transformer_blocks.12.attn.to_out.0.weight +25-08-28 16:33:27 | D | + quant_dtype: sint4 +25-08-28 16:33:27 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:27 | D | + scale_dtype: (None,) +25-08-28 16:33:27 | D | - Quantizing transformer_blocks.12.attn.to_add_out.weight +25-08-28 16:33:27 | D | + quant_dtype: sint4 +25-08-28 16:33:27 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:27 | D | + scale_dtype: (None,) +25-08-28 16:33:27 | D | - Quantizing transformer_blocks.12.ff.net.0.proj.weight +25-08-28 16:33:27 | D | + quant_dtype: sint4 +25-08-28 16:33:27 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:27 | D | + scale_dtype: (None,) +25-08-28 16:33:27 | D | - Quantizing transformer_blocks.12.ff.net.2.linear.weight +25-08-28 16:33:27 | D | + quant_dtype: sint4 +25-08-28 16:33:27 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:27 | D | + scale_dtype: (None,) +25-08-28 16:33:28 | D | - Quantizing transformer_blocks.12.ff_context.net.0.proj.weight +25-08-28 16:33:28 | D | + quant_dtype: sint4 +25-08-28 16:33:28 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:28 | D | + scale_dtype: (None,) +25-08-28 16:33:28 | D | - Quantizing transformer_blocks.12.ff_context.net.2.linear.weight +25-08-28 16:33:28 | D | + quant_dtype: sint4 +25-08-28 16:33:28 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:28 | D | + scale_dtype: (None,) +25-08-28 16:33:28 | D | - Quantizing weights: block transformer_blocks.13 +25-08-28 16:33:28 | D | - Quantizing transformer_blocks.13.norm1.linear.weight +25-08-28 16:33:28 | D | + quant_dtype: sint4 +25-08-28 16:33:28 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:28 | D | + scale_dtype: (None,) +25-08-28 16:33:28 | D | - Quantizing transformer_blocks.13.norm1_context.linear.weight +25-08-28 16:33:28 | D | + quant_dtype: sint4 +25-08-28 16:33:28 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:28 | D | + scale_dtype: (None,) +25-08-28 16:33:28 | D | - Quantizing transformer_blocks.13.attn.to_q.weight +25-08-28 16:33:28 | D | + quant_dtype: sint4 +25-08-28 16:33:28 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:28 | D | + scale_dtype: (None,) +25-08-28 16:33:29 | D | - Quantizing transformer_blocks.13.attn.to_k.weight +25-08-28 16:33:29 | D | + quant_dtype: sint4 +25-08-28 16:33:29 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:29 | D | + scale_dtype: (None,) +25-08-28 16:33:29 | D | - Quantizing transformer_blocks.13.attn.to_v.weight +25-08-28 16:33:29 | D | + quant_dtype: sint4 +25-08-28 16:33:29 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:29 | D | + scale_dtype: (None,) +25-08-28 16:33:29 | D | - Quantizing transformer_blocks.13.attn.add_q_proj.weight +25-08-28 16:33:29 | D | + quant_dtype: sint4 +25-08-28 16:33:29 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:29 | D | + scale_dtype: (None,) +25-08-28 16:33:29 | D | - Quantizing transformer_blocks.13.attn.add_k_proj.weight +25-08-28 16:33:29 | D | + quant_dtype: sint4 +25-08-28 16:33:29 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:29 | D | + scale_dtype: (None,) +25-08-28 16:33:29 | D | - Quantizing transformer_blocks.13.attn.add_v_proj.weight +25-08-28 16:33:29 | D | + quant_dtype: sint4 +25-08-28 16:33:29 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:29 | D | + scale_dtype: (None,) +25-08-28 16:33:30 | D | - Quantizing transformer_blocks.13.attn.to_out.0.weight +25-08-28 16:33:30 | D | + quant_dtype: sint4 +25-08-28 16:33:30 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:30 | D | + scale_dtype: (None,) +25-08-28 16:33:30 | D | - Quantizing transformer_blocks.13.attn.to_add_out.weight +25-08-28 16:33:30 | D | + quant_dtype: sint4 +25-08-28 16:33:30 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:30 | D | + scale_dtype: (None,) +25-08-28 16:33:30 | D | - Quantizing transformer_blocks.13.ff.net.0.proj.weight +25-08-28 16:33:30 | D | + quant_dtype: sint4 +25-08-28 16:33:30 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:30 | D | + scale_dtype: (None,) +25-08-28 16:33:30 | D | - Quantizing transformer_blocks.13.ff.net.2.linear.weight +25-08-28 16:33:30 | D | + quant_dtype: sint4 +25-08-28 16:33:30 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:30 | D | + scale_dtype: (None,) +25-08-28 16:33:30 | D | - Quantizing transformer_blocks.13.ff_context.net.0.proj.weight +25-08-28 16:33:30 | D | + quant_dtype: sint4 +25-08-28 16:33:30 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:30 | D | + scale_dtype: (None,) +25-08-28 16:33:31 | D | - Quantizing transformer_blocks.13.ff_context.net.2.linear.weight +25-08-28 16:33:31 | D | + quant_dtype: sint4 +25-08-28 16:33:31 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:31 | D | + scale_dtype: (None,) +25-08-28 16:33:31 | D | - Quantizing weights: block transformer_blocks.14 +25-08-28 16:33:31 | D | - Quantizing transformer_blocks.14.norm1.linear.weight +25-08-28 16:33:31 | D | + quant_dtype: sint4 +25-08-28 16:33:31 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:31 | D | + scale_dtype: (None,) +25-08-28 16:33:31 | D | - Quantizing transformer_blocks.14.norm1_context.linear.weight +25-08-28 16:33:31 | D | + quant_dtype: sint4 +25-08-28 16:33:31 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:31 | D | + scale_dtype: (None,) +25-08-28 16:33:31 | D | - Quantizing transformer_blocks.14.attn.to_q.weight +25-08-28 16:33:31 | D | + quant_dtype: sint4 +25-08-28 16:33:31 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:31 | D | + scale_dtype: (None,) +25-08-28 16:33:31 | D | - Quantizing transformer_blocks.14.attn.to_k.weight +25-08-28 16:33:31 | D | + quant_dtype: sint4 +25-08-28 16:33:31 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:31 | D | + scale_dtype: (None,) +25-08-28 16:33:32 | D | - Quantizing transformer_blocks.14.attn.to_v.weight +25-08-28 16:33:32 | D | + quant_dtype: sint4 +25-08-28 16:33:32 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:32 | D | + scale_dtype: (None,) +25-08-28 16:33:32 | D | - Quantizing transformer_blocks.14.attn.add_q_proj.weight +25-08-28 16:33:32 | D | + quant_dtype: sint4 +25-08-28 16:33:32 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:32 | D | + scale_dtype: (None,) +25-08-28 16:33:32 | D | - Quantizing transformer_blocks.14.attn.add_k_proj.weight +25-08-28 16:33:32 | D | + quant_dtype: sint4 +25-08-28 16:33:32 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:32 | D | + scale_dtype: (None,) +25-08-28 16:33:32 | D | - Quantizing transformer_blocks.14.attn.add_v_proj.weight +25-08-28 16:33:32 | D | + quant_dtype: sint4 +25-08-28 16:33:32 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:32 | D | + scale_dtype: (None,) +25-08-28 16:33:32 | D | - Quantizing transformer_blocks.14.attn.to_out.0.weight +25-08-28 16:33:32 | D | + quant_dtype: sint4 +25-08-28 16:33:32 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:32 | D | + scale_dtype: (None,) +25-08-28 16:33:33 | D | - Quantizing transformer_blocks.14.attn.to_add_out.weight +25-08-28 16:33:33 | D | + quant_dtype: sint4 +25-08-28 16:33:33 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:33 | D | + scale_dtype: (None,) +25-08-28 16:33:33 | D | - Quantizing transformer_blocks.14.ff.net.0.proj.weight +25-08-28 16:33:33 | D | + quant_dtype: sint4 +25-08-28 16:33:33 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:33 | D | + scale_dtype: (None,) +25-08-28 16:33:33 | D | - Quantizing transformer_blocks.14.ff.net.2.linear.weight +25-08-28 16:33:33 | D | + quant_dtype: sint4 +25-08-28 16:33:33 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:33 | D | + scale_dtype: (None,) +25-08-28 16:33:33 | D | - Quantizing transformer_blocks.14.ff_context.net.0.proj.weight +25-08-28 16:33:33 | D | + quant_dtype: sint4 +25-08-28 16:33:33 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:33 | D | + scale_dtype: (None,) +25-08-28 16:33:33 | D | - Quantizing transformer_blocks.14.ff_context.net.2.linear.weight +25-08-28 16:33:33 | D | + quant_dtype: sint4 +25-08-28 16:33:33 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:33 | D | + scale_dtype: (None,) +25-08-28 16:33:33 | D | - Quantizing weights: block transformer_blocks.15 +25-08-28 16:33:33 | D | - Quantizing transformer_blocks.15.norm1.linear.weight +25-08-28 16:33:33 | D | + quant_dtype: sint4 +25-08-28 16:33:33 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:33 | D | + scale_dtype: (None,) +25-08-28 16:33:34 | D | - Quantizing transformer_blocks.15.norm1_context.linear.weight +25-08-28 16:33:34 | D | + quant_dtype: sint4 +25-08-28 16:33:34 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:34 | D | + scale_dtype: (None,) +25-08-28 16:33:34 | D | - Quantizing transformer_blocks.15.attn.to_q.weight +25-08-28 16:33:34 | D | + quant_dtype: sint4 +25-08-28 16:33:34 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:34 | D | + scale_dtype: (None,) +25-08-28 16:33:34 | D | - Quantizing transformer_blocks.15.attn.to_k.weight +25-08-28 16:33:34 | D | + quant_dtype: sint4 +25-08-28 16:33:34 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:34 | D | + scale_dtype: (None,) +25-08-28 16:33:34 | D | - Quantizing transformer_blocks.15.attn.to_v.weight +25-08-28 16:33:34 | D | + quant_dtype: sint4 +25-08-28 16:33:34 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:34 | D | + scale_dtype: (None,) +25-08-28 16:33:34 | D | - Quantizing transformer_blocks.15.attn.add_q_proj.weight +25-08-28 16:33:34 | D | + quant_dtype: sint4 +25-08-28 16:33:34 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:34 | D | + scale_dtype: (None,) +25-08-28 16:33:35 | D | - Quantizing transformer_blocks.15.attn.add_k_proj.weight +25-08-28 16:33:35 | D | + quant_dtype: sint4 +25-08-28 16:33:35 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:35 | D | + scale_dtype: (None,) +25-08-28 16:33:35 | D | - Quantizing transformer_blocks.15.attn.add_v_proj.weight +25-08-28 16:33:35 | D | + quant_dtype: sint4 +25-08-28 16:33:35 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:35 | D | + scale_dtype: (None,) +25-08-28 16:33:35 | D | - Quantizing transformer_blocks.15.attn.to_out.0.weight +25-08-28 16:33:35 | D | + quant_dtype: sint4 +25-08-28 16:33:35 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:35 | D | + scale_dtype: (None,) +25-08-28 16:33:35 | D | - Quantizing transformer_blocks.15.attn.to_add_out.weight +25-08-28 16:33:35 | D | + quant_dtype: sint4 +25-08-28 16:33:35 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:35 | D | + scale_dtype: (None,) +25-08-28 16:33:35 | D | - Quantizing transformer_blocks.15.ff.net.0.proj.weight +25-08-28 16:33:35 | D | + quant_dtype: sint4 +25-08-28 16:33:35 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:35 | D | + scale_dtype: (None,) +25-08-28 16:33:36 | D | - Quantizing transformer_blocks.15.ff.net.2.linear.weight +25-08-28 16:33:36 | D | + quant_dtype: sint4 +25-08-28 16:33:36 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:36 | D | + scale_dtype: (None,) +25-08-28 16:33:36 | D | - Quantizing transformer_blocks.15.ff_context.net.0.proj.weight +25-08-28 16:33:36 | D | + quant_dtype: sint4 +25-08-28 16:33:36 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:36 | D | + scale_dtype: (None,) +25-08-28 16:33:36 | D | - Quantizing transformer_blocks.15.ff_context.net.2.linear.weight +25-08-28 16:33:36 | D | + quant_dtype: sint4 +25-08-28 16:33:36 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:36 | D | + scale_dtype: (None,) +25-08-28 16:33:36 | D | - Quantizing weights: block transformer_blocks.16 +25-08-28 16:33:36 | D | - Quantizing transformer_blocks.16.norm1.linear.weight +25-08-28 16:33:36 | D | + quant_dtype: sint4 +25-08-28 16:33:36 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:36 | D | + scale_dtype: (None,) +25-08-28 16:33:36 | D | - Quantizing transformer_blocks.16.norm1_context.linear.weight +25-08-28 16:33:36 | D | + quant_dtype: sint4 +25-08-28 16:33:36 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:36 | D | + scale_dtype: (None,) +25-08-28 16:33:36 | D | - Quantizing transformer_blocks.16.attn.to_q.weight +25-08-28 16:33:36 | D | + quant_dtype: sint4 +25-08-28 16:33:36 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:36 | D | + scale_dtype: (None,) +25-08-28 16:33:37 | D | - Quantizing transformer_blocks.16.attn.to_k.weight +25-08-28 16:33:37 | D | + quant_dtype: sint4 +25-08-28 16:33:37 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:37 | D | + scale_dtype: (None,) +25-08-28 16:33:37 | D | - Quantizing transformer_blocks.16.attn.to_v.weight +25-08-28 16:33:37 | D | + quant_dtype: sint4 +25-08-28 16:33:37 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:37 | D | + scale_dtype: (None,) +25-08-28 16:33:37 | D | - Quantizing transformer_blocks.16.attn.add_q_proj.weight +25-08-28 16:33:37 | D | + quant_dtype: sint4 +25-08-28 16:33:37 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:37 | D | + scale_dtype: (None,) +25-08-28 16:33:37 | D | - Quantizing transformer_blocks.16.attn.add_k_proj.weight +25-08-28 16:33:37 | D | + quant_dtype: sint4 +25-08-28 16:33:37 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:37 | D | + scale_dtype: (None,) +25-08-28 16:33:37 | D | - Quantizing transformer_blocks.16.attn.add_v_proj.weight +25-08-28 16:33:37 | D | + quant_dtype: sint4 +25-08-28 16:33:37 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:37 | D | + scale_dtype: (None,) +25-08-28 16:33:38 | D | - Quantizing transformer_blocks.16.attn.to_out.0.weight +25-08-28 16:33:38 | D | + quant_dtype: sint4 +25-08-28 16:33:38 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:38 | D | + scale_dtype: (None,) +25-08-28 16:33:38 | D | - Quantizing transformer_blocks.16.attn.to_add_out.weight +25-08-28 16:33:38 | D | + quant_dtype: sint4 +25-08-28 16:33:38 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:38 | D | + scale_dtype: (None,) +25-08-28 16:33:38 | D | - Quantizing transformer_blocks.16.ff.net.0.proj.weight +25-08-28 16:33:38 | D | + quant_dtype: sint4 +25-08-28 16:33:38 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:38 | D | + scale_dtype: (None,) +25-08-28 16:33:38 | D | - Quantizing transformer_blocks.16.ff.net.2.linear.weight +25-08-28 16:33:38 | D | + quant_dtype: sint4 +25-08-28 16:33:38 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:38 | D | + scale_dtype: (None,) +25-08-28 16:33:38 | D | - Quantizing transformer_blocks.16.ff_context.net.0.proj.weight +25-08-28 16:33:38 | D | + quant_dtype: sint4 +25-08-28 16:33:38 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:38 | D | + scale_dtype: (None,) +25-08-28 16:33:39 | D | - Quantizing transformer_blocks.16.ff_context.net.2.linear.weight +25-08-28 16:33:39 | D | + quant_dtype: sint4 +25-08-28 16:33:39 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:39 | D | + scale_dtype: (None,) +25-08-28 16:33:39 | D | - Quantizing weights: block transformer_blocks.17 +25-08-28 16:33:39 | D | - Quantizing transformer_blocks.17.norm1.linear.weight +25-08-28 16:33:39 | D | + quant_dtype: sint4 +25-08-28 16:33:39 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:39 | D | + scale_dtype: (None,) +25-08-28 16:33:39 | D | - Quantizing transformer_blocks.17.norm1_context.linear.weight +25-08-28 16:33:39 | D | + quant_dtype: sint4 +25-08-28 16:33:39 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:39 | D | + scale_dtype: (None,) +25-08-28 16:33:39 | D | - Quantizing transformer_blocks.17.attn.to_q.weight +25-08-28 16:33:39 | D | + quant_dtype: sint4 +25-08-28 16:33:39 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:39 | D | + scale_dtype: (None,) +25-08-28 16:33:39 | D | - Quantizing transformer_blocks.17.attn.to_k.weight +25-08-28 16:33:39 | D | + quant_dtype: sint4 +25-08-28 16:33:39 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:39 | D | + scale_dtype: (None,) +25-08-28 16:33:40 | D | - Quantizing transformer_blocks.17.attn.to_v.weight +25-08-28 16:33:40 | D | + quant_dtype: sint4 +25-08-28 16:33:40 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:40 | D | + scale_dtype: (None,) +25-08-28 16:33:40 | D | - Quantizing transformer_blocks.17.attn.add_q_proj.weight +25-08-28 16:33:40 | D | + quant_dtype: sint4 +25-08-28 16:33:40 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:40 | D | + scale_dtype: (None,) +25-08-28 16:33:40 | D | - Quantizing transformer_blocks.17.attn.add_k_proj.weight +25-08-28 16:33:40 | D | + quant_dtype: sint4 +25-08-28 16:33:40 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:40 | D | + scale_dtype: (None,) +25-08-28 16:33:40 | D | - Quantizing transformer_blocks.17.attn.add_v_proj.weight +25-08-28 16:33:40 | D | + quant_dtype: sint4 +25-08-28 16:33:40 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:40 | D | + scale_dtype: (None,) +25-08-28 16:33:40 | D | - Quantizing transformer_blocks.17.attn.to_out.0.weight +25-08-28 16:33:40 | D | + quant_dtype: sint4 +25-08-28 16:33:40 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:40 | D | + scale_dtype: (None,) +25-08-28 16:33:40 | D | - Quantizing transformer_blocks.17.attn.to_add_out.weight +25-08-28 16:33:40 | D | + quant_dtype: sint4 +25-08-28 16:33:40 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:40 | D | + scale_dtype: (None,) +25-08-28 16:33:41 | D | - Quantizing transformer_blocks.17.ff.net.0.proj.weight +25-08-28 16:33:41 | D | + quant_dtype: sint4 +25-08-28 16:33:41 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:41 | D | + scale_dtype: (None,) +25-08-28 16:33:41 | D | - Quantizing transformer_blocks.17.ff.net.2.linear.weight +25-08-28 16:33:41 | D | + quant_dtype: sint4 +25-08-28 16:33:41 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:41 | D | + scale_dtype: (None,) +25-08-28 16:33:41 | D | - Quantizing transformer_blocks.17.ff_context.net.0.proj.weight +25-08-28 16:33:41 | D | + quant_dtype: sint4 +25-08-28 16:33:41 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:41 | D | + scale_dtype: (None,) +25-08-28 16:33:41 | D | - Quantizing transformer_blocks.17.ff_context.net.2.linear.weight +25-08-28 16:33:41 | D | + quant_dtype: sint4 +25-08-28 16:33:41 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:41 | D | + scale_dtype: (None,) +25-08-28 16:33:41 | D | - Quantizing weights: block transformer_blocks.18 +25-08-28 16:33:41 | D | - Quantizing transformer_blocks.18.norm1.linear.weight +25-08-28 16:33:41 | D | + quant_dtype: sint4 +25-08-28 16:33:41 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:41 | D | + scale_dtype: (None,) +25-08-28 16:33:42 | D | - Quantizing transformer_blocks.18.norm1_context.linear.weight +25-08-28 16:33:42 | D | + quant_dtype: sint4 +25-08-28 16:33:42 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:42 | D | + scale_dtype: (None,) +25-08-28 16:33:42 | D | - Quantizing transformer_blocks.18.attn.to_q.weight +25-08-28 16:33:42 | D | + quant_dtype: sint4 +25-08-28 16:33:42 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:42 | D | + scale_dtype: (None,) +25-08-28 16:33:42 | D | - Quantizing transformer_blocks.18.attn.to_k.weight +25-08-28 16:33:42 | D | + quant_dtype: sint4 +25-08-28 16:33:42 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:42 | D | + scale_dtype: (None,) +25-08-28 16:33:42 | D | - Quantizing transformer_blocks.18.attn.to_v.weight +25-08-28 16:33:42 | D | + quant_dtype: sint4 +25-08-28 16:33:42 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:42 | D | + scale_dtype: (None,) +25-08-28 16:33:42 | D | - Quantizing transformer_blocks.18.attn.add_q_proj.weight +25-08-28 16:33:42 | D | + quant_dtype: sint4 +25-08-28 16:33:42 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:42 | D | + scale_dtype: (None,) +25-08-28 16:33:43 | D | - Quantizing transformer_blocks.18.attn.add_k_proj.weight +25-08-28 16:33:43 | D | + quant_dtype: sint4 +25-08-28 16:33:43 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:43 | D | + scale_dtype: (None,) +25-08-28 16:33:43 | D | - Quantizing transformer_blocks.18.attn.add_v_proj.weight +25-08-28 16:33:43 | D | + quant_dtype: sint4 +25-08-28 16:33:43 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:43 | D | + scale_dtype: (None,) +25-08-28 16:33:43 | D | - Quantizing transformer_blocks.18.attn.to_out.0.weight +25-08-28 16:33:43 | D | + quant_dtype: sint4 +25-08-28 16:33:43 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:43 | D | + scale_dtype: (None,) +25-08-28 16:33:43 | D | - Quantizing transformer_blocks.18.attn.to_add_out.weight +25-08-28 16:33:43 | D | + quant_dtype: sint4 +25-08-28 16:33:43 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:43 | D | + scale_dtype: (None,) +25-08-28 16:33:43 | D | - Quantizing transformer_blocks.18.ff.net.0.proj.weight +25-08-28 16:33:43 | D | + quant_dtype: sint4 +25-08-28 16:33:43 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:43 | D | + scale_dtype: (None,) +25-08-28 16:33:44 | D | - Quantizing transformer_blocks.18.ff.net.2.linear.weight +25-08-28 16:33:44 | D | + quant_dtype: sint4 +25-08-28 16:33:44 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:44 | D | + scale_dtype: (None,) +25-08-28 16:33:44 | D | - Quantizing transformer_blocks.18.ff_context.net.0.proj.weight +25-08-28 16:33:44 | D | + quant_dtype: sint4 +25-08-28 16:33:44 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:44 | D | + scale_dtype: (None,) +25-08-28 16:33:44 | D | - Quantizing transformer_blocks.18.ff_context.net.2.linear.weight +25-08-28 16:33:44 | D | + quant_dtype: sint4 +25-08-28 16:33:44 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:44 | D | + scale_dtype: (None,) +25-08-28 16:33:44 | D | - Quantizing weights: block single_transformer_blocks.0 +25-08-28 16:33:44 | D | - Quantizing single_transformer_blocks.0.norm.linear.weight +25-08-28 16:33:44 | D | + quant_dtype: sint4 +25-08-28 16:33:44 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:44 | D | + scale_dtype: (None,) +25-08-28 16:33:44 | D | - Quantizing single_transformer_blocks.0.attn.to_q.weight +25-08-28 16:33:44 | D | + quant_dtype: sint4 +25-08-28 16:33:44 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:44 | D | + scale_dtype: (None,) +25-08-28 16:33:44 | D | - Quantizing single_transformer_blocks.0.attn.to_k.weight +25-08-28 16:33:44 | D | + quant_dtype: sint4 +25-08-28 16:33:44 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:44 | D | + scale_dtype: (None,) +25-08-28 16:33:45 | D | - Quantizing single_transformer_blocks.0.attn.to_v.weight +25-08-28 16:33:45 | D | + quant_dtype: sint4 +25-08-28 16:33:45 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:45 | D | + scale_dtype: (None,) +25-08-28 16:33:45 | D | - Quantizing single_transformer_blocks.0.proj_out.linears.0.weight +25-08-28 16:33:45 | D | + quant_dtype: sint4 +25-08-28 16:33:45 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:45 | D | + scale_dtype: (None,) +25-08-28 16:33:45 | D | - Quantizing single_transformer_blocks.0.proj_mlp.weight +25-08-28 16:33:45 | D | + quant_dtype: sint4 +25-08-28 16:33:45 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:45 | D | + scale_dtype: (None,) +25-08-28 16:33:45 | D | - Quantizing single_transformer_blocks.0.proj_out.linears.1.linear.weight +25-08-28 16:33:45 | D | + quant_dtype: sint4 +25-08-28 16:33:45 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:45 | D | + scale_dtype: (None,) +25-08-28 16:33:45 | D | - Quantizing weights: block single_transformer_blocks.1 +25-08-28 16:33:45 | D | - Quantizing single_transformer_blocks.1.norm.linear.weight +25-08-28 16:33:45 | D | + quant_dtype: sint4 +25-08-28 16:33:45 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:45 | D | + scale_dtype: (None,) +25-08-28 16:33:46 | D | - Quantizing single_transformer_blocks.1.attn.to_q.weight +25-08-28 16:33:46 | D | + quant_dtype: sint4 +25-08-28 16:33:46 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:46 | D | + scale_dtype: (None,) +25-08-28 16:33:46 | D | - Quantizing single_transformer_blocks.1.attn.to_k.weight +25-08-28 16:33:46 | D | + quant_dtype: sint4 +25-08-28 16:33:46 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:46 | D | + scale_dtype: (None,) +25-08-28 16:33:46 | D | - Quantizing single_transformer_blocks.1.attn.to_v.weight +25-08-28 16:33:46 | D | + quant_dtype: sint4 +25-08-28 16:33:46 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:46 | D | + scale_dtype: (None,) +25-08-28 16:33:46 | D | - Quantizing single_transformer_blocks.1.proj_out.linears.0.weight +25-08-28 16:33:46 | D | + quant_dtype: sint4 +25-08-28 16:33:46 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:46 | D | + scale_dtype: (None,) +25-08-28 16:33:46 | D | - Quantizing single_transformer_blocks.1.proj_mlp.weight +25-08-28 16:33:46 | D | + quant_dtype: sint4 +25-08-28 16:33:46 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:46 | D | + scale_dtype: (None,) +25-08-28 16:33:47 | D | - Quantizing single_transformer_blocks.1.proj_out.linears.1.linear.weight +25-08-28 16:33:47 | D | + quant_dtype: sint4 +25-08-28 16:33:47 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:47 | D | + scale_dtype: (None,) +25-08-28 16:33:47 | D | - Quantizing weights: block single_transformer_blocks.2 +25-08-28 16:33:47 | D | - Quantizing single_transformer_blocks.2.norm.linear.weight +25-08-28 16:33:47 | D | + quant_dtype: sint4 +25-08-28 16:33:47 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:47 | D | + scale_dtype: (None,) +25-08-28 16:33:47 | D | - Quantizing single_transformer_blocks.2.attn.to_q.weight +25-08-28 16:33:47 | D | + quant_dtype: sint4 +25-08-28 16:33:47 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:47 | D | + scale_dtype: (None,) +25-08-28 16:33:47 | D | - Quantizing single_transformer_blocks.2.attn.to_k.weight +25-08-28 16:33:47 | D | + quant_dtype: sint4 +25-08-28 16:33:47 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:47 | D | + scale_dtype: (None,) +25-08-28 16:33:47 | D | - Quantizing single_transformer_blocks.2.attn.to_v.weight +25-08-28 16:33:47 | D | + quant_dtype: sint4 +25-08-28 16:33:47 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:47 | D | + scale_dtype: (None,) +25-08-28 16:33:48 | D | - Quantizing single_transformer_blocks.2.proj_out.linears.0.weight +25-08-28 16:33:48 | D | + quant_dtype: sint4 +25-08-28 16:33:48 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:48 | D | + scale_dtype: (None,) +25-08-28 16:33:48 | D | - Quantizing single_transformer_blocks.2.proj_mlp.weight +25-08-28 16:33:48 | D | + quant_dtype: sint4 +25-08-28 16:33:48 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:48 | D | + scale_dtype: (None,) +25-08-28 16:33:48 | D | - Quantizing single_transformer_blocks.2.proj_out.linears.1.linear.weight +25-08-28 16:33:48 | D | + quant_dtype: sint4 +25-08-28 16:33:48 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:48 | D | + scale_dtype: (None,) +25-08-28 16:33:48 | D | - Quantizing weights: block single_transformer_blocks.3 +25-08-28 16:33:48 | D | - Quantizing single_transformer_blocks.3.norm.linear.weight +25-08-28 16:33:48 | D | + quant_dtype: sint4 +25-08-28 16:33:48 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:48 | D | + scale_dtype: (None,) +25-08-28 16:33:48 | D | - Quantizing single_transformer_blocks.3.attn.to_q.weight +25-08-28 16:33:48 | D | + quant_dtype: sint4 +25-08-28 16:33:48 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:48 | D | + scale_dtype: (None,) +25-08-28 16:33:48 | D | - Quantizing single_transformer_blocks.3.attn.to_k.weight +25-08-28 16:33:48 | D | + quant_dtype: sint4 +25-08-28 16:33:48 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:48 | D | + scale_dtype: (None,) +25-08-28 16:33:49 | D | - Quantizing single_transformer_blocks.3.attn.to_v.weight +25-08-28 16:33:49 | D | + quant_dtype: sint4 +25-08-28 16:33:49 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:49 | D | + scale_dtype: (None,) +25-08-28 16:33:49 | D | - Quantizing single_transformer_blocks.3.proj_out.linears.0.weight +25-08-28 16:33:49 | D | + quant_dtype: sint4 +25-08-28 16:33:49 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:49 | D | + scale_dtype: (None,) +25-08-28 16:33:49 | D | - Quantizing single_transformer_blocks.3.proj_mlp.weight +25-08-28 16:33:49 | D | + quant_dtype: sint4 +25-08-28 16:33:49 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:49 | D | + scale_dtype: (None,) +25-08-28 16:33:49 | D | - Quantizing single_transformer_blocks.3.proj_out.linears.1.linear.weight +25-08-28 16:33:49 | D | + quant_dtype: sint4 +25-08-28 16:33:49 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:49 | D | + scale_dtype: (None,) +25-08-28 16:33:49 | D | - Quantizing weights: block single_transformer_blocks.4 +25-08-28 16:33:49 | D | - Quantizing single_transformer_blocks.4.norm.linear.weight +25-08-28 16:33:49 | D | + quant_dtype: sint4 +25-08-28 16:33:49 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:49 | D | + scale_dtype: (None,) +25-08-28 16:33:50 | D | - Quantizing single_transformer_blocks.4.attn.to_q.weight +25-08-28 16:33:50 | D | + quant_dtype: sint4 +25-08-28 16:33:50 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:50 | D | + scale_dtype: (None,) +25-08-28 16:33:50 | D | - Quantizing single_transformer_blocks.4.attn.to_k.weight +25-08-28 16:33:50 | D | + quant_dtype: sint4 +25-08-28 16:33:50 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:50 | D | + scale_dtype: (None,) +25-08-28 16:33:50 | D | - Quantizing single_transformer_blocks.4.attn.to_v.weight +25-08-28 16:33:50 | D | + quant_dtype: sint4 +25-08-28 16:33:50 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:50 | D | + scale_dtype: (None,) +25-08-28 16:33:50 | D | - Quantizing single_transformer_blocks.4.proj_out.linears.0.weight +25-08-28 16:33:50 | D | + quant_dtype: sint4 +25-08-28 16:33:50 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:50 | D | + scale_dtype: (None,) +25-08-28 16:33:50 | D | - Quantizing single_transformer_blocks.4.proj_mlp.weight +25-08-28 16:33:50 | D | + quant_dtype: sint4 +25-08-28 16:33:50 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:50 | D | + scale_dtype: (None,) +25-08-28 16:33:51 | D | - Quantizing single_transformer_blocks.4.proj_out.linears.1.linear.weight +25-08-28 16:33:51 | D | + quant_dtype: sint4 +25-08-28 16:33:51 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:51 | D | + scale_dtype: (None,) +25-08-28 16:33:51 | D | - Quantizing weights: block single_transformer_blocks.5 +25-08-28 16:33:51 | D | - Quantizing single_transformer_blocks.5.norm.linear.weight +25-08-28 16:33:51 | D | + quant_dtype: sint4 +25-08-28 16:33:51 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:51 | D | + scale_dtype: (None,) +25-08-28 16:33:51 | D | - Quantizing single_transformer_blocks.5.attn.to_q.weight +25-08-28 16:33:51 | D | + quant_dtype: sint4 +25-08-28 16:33:51 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:51 | D | + scale_dtype: (None,) +25-08-28 16:33:51 | D | - Quantizing single_transformer_blocks.5.attn.to_k.weight +25-08-28 16:33:51 | D | + quant_dtype: sint4 +25-08-28 16:33:51 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:51 | D | + scale_dtype: (None,) +25-08-28 16:33:51 | D | - Quantizing single_transformer_blocks.5.attn.to_v.weight +25-08-28 16:33:51 | D | + quant_dtype: sint4 +25-08-28 16:33:51 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:51 | D | + scale_dtype: (None,) +25-08-28 16:33:52 | D | - Quantizing single_transformer_blocks.5.proj_out.linears.0.weight +25-08-28 16:33:52 | D | + quant_dtype: sint4 +25-08-28 16:33:52 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:52 | D | + scale_dtype: (None,) +25-08-28 16:33:52 | D | - Quantizing single_transformer_blocks.5.proj_mlp.weight +25-08-28 16:33:52 | D | + quant_dtype: sint4 +25-08-28 16:33:52 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:52 | D | + scale_dtype: (None,) +25-08-28 16:33:52 | D | - Quantizing single_transformer_blocks.5.proj_out.linears.1.linear.weight +25-08-28 16:33:52 | D | + quant_dtype: sint4 +25-08-28 16:33:52 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:52 | D | + scale_dtype: (None,) +25-08-28 16:33:52 | D | - Quantizing weights: block single_transformer_blocks.6 +25-08-28 16:33:52 | D | - Quantizing single_transformer_blocks.6.norm.linear.weight +25-08-28 16:33:52 | D | + quant_dtype: sint4 +25-08-28 16:33:52 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:52 | D | + scale_dtype: (None,) +25-08-28 16:33:52 | D | - Quantizing single_transformer_blocks.6.attn.to_q.weight +25-08-28 16:33:52 | D | + quant_dtype: sint4 +25-08-28 16:33:52 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:52 | D | + scale_dtype: (None,) +25-08-28 16:33:53 | D | - Quantizing single_transformer_blocks.6.attn.to_k.weight +25-08-28 16:33:53 | D | + quant_dtype: sint4 +25-08-28 16:33:53 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:53 | D | + scale_dtype: (None,) +25-08-28 16:33:53 | D | - Quantizing single_transformer_blocks.6.attn.to_v.weight +25-08-28 16:33:53 | D | + quant_dtype: sint4 +25-08-28 16:33:53 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:53 | D | + scale_dtype: (None,) +25-08-28 16:33:53 | D | - Quantizing single_transformer_blocks.6.proj_out.linears.0.weight +25-08-28 16:33:53 | D | + quant_dtype: sint4 +25-08-28 16:33:53 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:53 | D | + scale_dtype: (None,) +25-08-28 16:33:53 | D | - Quantizing single_transformer_blocks.6.proj_mlp.weight +25-08-28 16:33:53 | D | + quant_dtype: sint4 +25-08-28 16:33:53 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:53 | D | + scale_dtype: (None,) +25-08-28 16:33:53 | D | - Quantizing single_transformer_blocks.6.proj_out.linears.1.linear.weight +25-08-28 16:33:53 | D | + quant_dtype: sint4 +25-08-28 16:33:53 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:53 | D | + scale_dtype: (None,) +25-08-28 16:33:53 | D | - Quantizing weights: block single_transformer_blocks.7 +25-08-28 16:33:53 | D | - Quantizing single_transformer_blocks.7.norm.linear.weight +25-08-28 16:33:53 | D | + quant_dtype: sint4 +25-08-28 16:33:53 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:53 | D | + scale_dtype: (None,) +25-08-28 16:33:54 | D | - Quantizing single_transformer_blocks.7.attn.to_q.weight +25-08-28 16:33:54 | D | + quant_dtype: sint4 +25-08-28 16:33:54 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:54 | D | + scale_dtype: (None,) +25-08-28 16:33:54 | D | - Quantizing single_transformer_blocks.7.attn.to_k.weight +25-08-28 16:33:54 | D | + quant_dtype: sint4 +25-08-28 16:33:54 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:54 | D | + scale_dtype: (None,) +25-08-28 16:33:54 | D | - Quantizing single_transformer_blocks.7.attn.to_v.weight +25-08-28 16:33:54 | D | + quant_dtype: sint4 +25-08-28 16:33:54 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:54 | D | + scale_dtype: (None,) +25-08-28 16:33:54 | D | - Quantizing single_transformer_blocks.7.proj_out.linears.0.weight +25-08-28 16:33:54 | D | + quant_dtype: sint4 +25-08-28 16:33:54 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:54 | D | + scale_dtype: (None,) +25-08-28 16:33:54 | D | - Quantizing single_transformer_blocks.7.proj_mlp.weight +25-08-28 16:33:54 | D | + quant_dtype: sint4 +25-08-28 16:33:54 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:54 | D | + scale_dtype: (None,) +25-08-28 16:33:55 | D | - Quantizing single_transformer_blocks.7.proj_out.linears.1.linear.weight +25-08-28 16:33:55 | D | + quant_dtype: sint4 +25-08-28 16:33:55 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:55 | D | + scale_dtype: (None,) +25-08-28 16:33:55 | D | - Quantizing weights: block single_transformer_blocks.8 +25-08-28 16:33:55 | D | - Quantizing single_transformer_blocks.8.norm.linear.weight +25-08-28 16:33:55 | D | + quant_dtype: sint4 +25-08-28 16:33:55 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:55 | D | + scale_dtype: (None,) +25-08-28 16:33:55 | D | - Quantizing single_transformer_blocks.8.attn.to_q.weight +25-08-28 16:33:55 | D | + quant_dtype: sint4 +25-08-28 16:33:55 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:55 | D | + scale_dtype: (None,) +25-08-28 16:33:55 | D | - Quantizing single_transformer_blocks.8.attn.to_k.weight +25-08-28 16:33:55 | D | + quant_dtype: sint4 +25-08-28 16:33:55 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:55 | D | + scale_dtype: (None,) +25-08-28 16:33:55 | D | - Quantizing single_transformer_blocks.8.attn.to_v.weight +25-08-28 16:33:55 | D | + quant_dtype: sint4 +25-08-28 16:33:55 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:55 | D | + scale_dtype: (None,) +25-08-28 16:33:56 | D | - Quantizing single_transformer_blocks.8.proj_out.linears.0.weight +25-08-28 16:33:56 | D | + quant_dtype: sint4 +25-08-28 16:33:56 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:56 | D | + scale_dtype: (None,) +25-08-28 16:33:56 | D | - Quantizing single_transformer_blocks.8.proj_mlp.weight +25-08-28 16:33:56 | D | + quant_dtype: sint4 +25-08-28 16:33:56 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:56 | D | + scale_dtype: (None,) +25-08-28 16:33:56 | D | - Quantizing single_transformer_blocks.8.proj_out.linears.1.linear.weight +25-08-28 16:33:56 | D | + quant_dtype: sint4 +25-08-28 16:33:56 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:56 | D | + scale_dtype: (None,) +25-08-28 16:33:56 | D | - Quantizing weights: block single_transformer_blocks.9 +25-08-28 16:33:56 | D | - Quantizing single_transformer_blocks.9.norm.linear.weight +25-08-28 16:33:56 | D | + quant_dtype: sint4 +25-08-28 16:33:56 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:56 | D | + scale_dtype: (None,) +25-08-28 16:33:56 | D | - Quantizing single_transformer_blocks.9.attn.to_q.weight +25-08-28 16:33:56 | D | + quant_dtype: sint4 +25-08-28 16:33:56 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:56 | D | + scale_dtype: (None,) +25-08-28 16:33:56 | D | - Quantizing single_transformer_blocks.9.attn.to_k.weight +25-08-28 16:33:56 | D | + quant_dtype: sint4 +25-08-28 16:33:56 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:56 | D | + scale_dtype: (None,) +25-08-28 16:33:57 | D | - Quantizing single_transformer_blocks.9.attn.to_v.weight +25-08-28 16:33:57 | D | + quant_dtype: sint4 +25-08-28 16:33:57 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:57 | D | + scale_dtype: (None,) +25-08-28 16:33:57 | D | - Quantizing single_transformer_blocks.9.proj_out.linears.0.weight +25-08-28 16:33:57 | D | + quant_dtype: sint4 +25-08-28 16:33:57 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:57 | D | + scale_dtype: (None,) +25-08-28 16:33:57 | D | - Quantizing single_transformer_blocks.9.proj_mlp.weight +25-08-28 16:33:57 | D | + quant_dtype: sint4 +25-08-28 16:33:57 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:57 | D | + scale_dtype: (None,) +25-08-28 16:33:57 | D | - Quantizing single_transformer_blocks.9.proj_out.linears.1.linear.weight +25-08-28 16:33:57 | D | + quant_dtype: sint4 +25-08-28 16:33:57 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:57 | D | + scale_dtype: (None,) +25-08-28 16:33:57 | D | - Quantizing weights: block single_transformer_blocks.10 +25-08-28 16:33:57 | D | - Quantizing single_transformer_blocks.10.norm.linear.weight +25-08-28 16:33:57 | D | + quant_dtype: sint4 +25-08-28 16:33:57 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:57 | D | + scale_dtype: (None,) +25-08-28 16:33:58 | D | - Quantizing single_transformer_blocks.10.attn.to_q.weight +25-08-28 16:33:58 | D | + quant_dtype: sint4 +25-08-28 16:33:58 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:58 | D | + scale_dtype: (None,) +25-08-28 16:33:58 | D | - Quantizing single_transformer_blocks.10.attn.to_k.weight +25-08-28 16:33:58 | D | + quant_dtype: sint4 +25-08-28 16:33:58 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:58 | D | + scale_dtype: (None,) +25-08-28 16:33:58 | D | - Quantizing single_transformer_blocks.10.attn.to_v.weight +25-08-28 16:33:58 | D | + quant_dtype: sint4 +25-08-28 16:33:58 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:58 | D | + scale_dtype: (None,) +25-08-28 16:33:58 | D | - Quantizing single_transformer_blocks.10.proj_out.linears.0.weight +25-08-28 16:33:58 | D | + quant_dtype: sint4 +25-08-28 16:33:58 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:58 | D | + scale_dtype: (None,) +25-08-28 16:33:58 | D | - Quantizing single_transformer_blocks.10.proj_mlp.weight +25-08-28 16:33:58 | D | + quant_dtype: sint4 +25-08-28 16:33:58 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:58 | D | + scale_dtype: (None,) +25-08-28 16:33:59 | D | - Quantizing single_transformer_blocks.10.proj_out.linears.1.linear.weight +25-08-28 16:33:59 | D | + quant_dtype: sint4 +25-08-28 16:33:59 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:59 | D | + scale_dtype: (None,) +25-08-28 16:33:59 | D | - Quantizing weights: block single_transformer_blocks.11 +25-08-28 16:33:59 | D | - Quantizing single_transformer_blocks.11.norm.linear.weight +25-08-28 16:33:59 | D | + quant_dtype: sint4 +25-08-28 16:33:59 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:59 | D | + scale_dtype: (None,) +25-08-28 16:33:59 | D | - Quantizing single_transformer_blocks.11.attn.to_q.weight +25-08-28 16:33:59 | D | + quant_dtype: sint4 +25-08-28 16:33:59 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:59 | D | + scale_dtype: (None,) +25-08-28 16:33:59 | D | - Quantizing single_transformer_blocks.11.attn.to_k.weight +25-08-28 16:33:59 | D | + quant_dtype: sint4 +25-08-28 16:33:59 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:59 | D | + scale_dtype: (None,) +25-08-28 16:33:59 | D | - Quantizing single_transformer_blocks.11.attn.to_v.weight +25-08-28 16:33:59 | D | + quant_dtype: sint4 +25-08-28 16:33:59 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:33:59 | D | + scale_dtype: (None,) +25-08-28 16:34:00 | D | - Quantizing single_transformer_blocks.11.proj_out.linears.0.weight +25-08-28 16:34:00 | D | + quant_dtype: sint4 +25-08-28 16:34:00 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:00 | D | + scale_dtype: (None,) +25-08-28 16:34:00 | D | - Quantizing single_transformer_blocks.11.proj_mlp.weight +25-08-28 16:34:00 | D | + quant_dtype: sint4 +25-08-28 16:34:00 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:00 | D | + scale_dtype: (None,) +25-08-28 16:34:00 | D | - Quantizing single_transformer_blocks.11.proj_out.linears.1.linear.weight +25-08-28 16:34:00 | D | + quant_dtype: sint4 +25-08-28 16:34:00 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:00 | D | + scale_dtype: (None,) +25-08-28 16:34:00 | D | - Quantizing weights: block single_transformer_blocks.12 +25-08-28 16:34:00 | D | - Quantizing single_transformer_blocks.12.norm.linear.weight +25-08-28 16:34:00 | D | + quant_dtype: sint4 +25-08-28 16:34:00 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:00 | D | + scale_dtype: (None,) +25-08-28 16:34:00 | D | - Quantizing single_transformer_blocks.12.attn.to_q.weight +25-08-28 16:34:00 | D | + quant_dtype: sint4 +25-08-28 16:34:00 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:00 | D | + scale_dtype: (None,) +25-08-28 16:34:01 | D | - Quantizing single_transformer_blocks.12.attn.to_k.weight +25-08-28 16:34:01 | D | + quant_dtype: sint4 +25-08-28 16:34:01 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:01 | D | + scale_dtype: (None,) +25-08-28 16:34:01 | D | - Quantizing single_transformer_blocks.12.attn.to_v.weight +25-08-28 16:34:01 | D | + quant_dtype: sint4 +25-08-28 16:34:01 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:01 | D | + scale_dtype: (None,) +25-08-28 16:34:01 | D | - Quantizing single_transformer_blocks.12.proj_out.linears.0.weight +25-08-28 16:34:01 | D | + quant_dtype: sint4 +25-08-28 16:34:01 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:01 | D | + scale_dtype: (None,) +25-08-28 16:34:01 | D | - Quantizing single_transformer_blocks.12.proj_mlp.weight +25-08-28 16:34:01 | D | + quant_dtype: sint4 +25-08-28 16:34:01 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:01 | D | + scale_dtype: (None,) +25-08-28 16:34:01 | D | - Quantizing single_transformer_blocks.12.proj_out.linears.1.linear.weight +25-08-28 16:34:01 | D | + quant_dtype: sint4 +25-08-28 16:34:01 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:01 | D | + scale_dtype: (None,) +25-08-28 16:34:01 | D | - Quantizing weights: block single_transformer_blocks.13 +25-08-28 16:34:01 | D | - Quantizing single_transformer_blocks.13.norm.linear.weight +25-08-28 16:34:01 | D | + quant_dtype: sint4 +25-08-28 16:34:01 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:01 | D | + scale_dtype: (None,) +25-08-28 16:34:02 | D | - Quantizing single_transformer_blocks.13.attn.to_q.weight +25-08-28 16:34:02 | D | + quant_dtype: sint4 +25-08-28 16:34:02 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:02 | D | + scale_dtype: (None,) +25-08-28 16:34:02 | D | - Quantizing single_transformer_blocks.13.attn.to_k.weight +25-08-28 16:34:02 | D | + quant_dtype: sint4 +25-08-28 16:34:02 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:02 | D | + scale_dtype: (None,) +25-08-28 16:34:02 | D | - Quantizing single_transformer_blocks.13.attn.to_v.weight +25-08-28 16:34:02 | D | + quant_dtype: sint4 +25-08-28 16:34:02 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:02 | D | + scale_dtype: (None,) +25-08-28 16:34:02 | D | - Quantizing single_transformer_blocks.13.proj_out.linears.0.weight +25-08-28 16:34:02 | D | + quant_dtype: sint4 +25-08-28 16:34:02 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:02 | D | + scale_dtype: (None,) +25-08-28 16:34:02 | D | - Quantizing single_transformer_blocks.13.proj_mlp.weight +25-08-28 16:34:02 | D | + quant_dtype: sint4 +25-08-28 16:34:02 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:02 | D | + scale_dtype: (None,) +25-08-28 16:34:03 | D | - Quantizing single_transformer_blocks.13.proj_out.linears.1.linear.weight +25-08-28 16:34:03 | D | + quant_dtype: sint4 +25-08-28 16:34:03 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:03 | D | + scale_dtype: (None,) +25-08-28 16:34:03 | D | - Quantizing weights: block single_transformer_blocks.14 +25-08-28 16:34:03 | D | - Quantizing single_transformer_blocks.14.norm.linear.weight +25-08-28 16:34:03 | D | + quant_dtype: sint4 +25-08-28 16:34:03 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:03 | D | + scale_dtype: (None,) +25-08-28 16:34:03 | D | - Quantizing single_transformer_blocks.14.attn.to_q.weight +25-08-28 16:34:03 | D | + quant_dtype: sint4 +25-08-28 16:34:03 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:03 | D | + scale_dtype: (None,) +25-08-28 16:34:03 | D | - Quantizing single_transformer_blocks.14.attn.to_k.weight +25-08-28 16:34:03 | D | + quant_dtype: sint4 +25-08-28 16:34:03 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:03 | D | + scale_dtype: (None,) +25-08-28 16:34:03 | D | - Quantizing single_transformer_blocks.14.attn.to_v.weight +25-08-28 16:34:03 | D | + quant_dtype: sint4 +25-08-28 16:34:03 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:03 | D | + scale_dtype: (None,) +25-08-28 16:34:04 | D | - Quantizing single_transformer_blocks.14.proj_out.linears.0.weight +25-08-28 16:34:04 | D | + quant_dtype: sint4 +25-08-28 16:34:04 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:04 | D | + scale_dtype: (None,) +25-08-28 16:34:04 | D | - Quantizing single_transformer_blocks.14.proj_mlp.weight +25-08-28 16:34:04 | D | + quant_dtype: sint4 +25-08-28 16:34:04 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:04 | D | + scale_dtype: (None,) +25-08-28 16:34:04 | D | - Quantizing single_transformer_blocks.14.proj_out.linears.1.linear.weight +25-08-28 16:34:04 | D | + quant_dtype: sint4 +25-08-28 16:34:04 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:04 | D | + scale_dtype: (None,) +25-08-28 16:34:04 | D | - Quantizing weights: block single_transformer_blocks.15 +25-08-28 16:34:04 | D | - Quantizing single_transformer_blocks.15.norm.linear.weight +25-08-28 16:34:04 | D | + quant_dtype: sint4 +25-08-28 16:34:04 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:04 | D | + scale_dtype: (None,) +25-08-28 16:34:04 | D | - Quantizing single_transformer_blocks.15.attn.to_q.weight +25-08-28 16:34:04 | D | + quant_dtype: sint4 +25-08-28 16:34:04 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:04 | D | + scale_dtype: (None,) +25-08-28 16:34:05 | D | - Quantizing single_transformer_blocks.15.attn.to_k.weight +25-08-28 16:34:05 | D | + quant_dtype: sint4 +25-08-28 16:34:05 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:05 | D | + scale_dtype: (None,) +25-08-28 16:34:05 | D | - Quantizing single_transformer_blocks.15.attn.to_v.weight +25-08-28 16:34:05 | D | + quant_dtype: sint4 +25-08-28 16:34:05 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:05 | D | + scale_dtype: (None,) +25-08-28 16:34:05 | D | - Quantizing single_transformer_blocks.15.proj_out.linears.0.weight +25-08-28 16:34:05 | D | + quant_dtype: sint4 +25-08-28 16:34:05 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:05 | D | + scale_dtype: (None,) +25-08-28 16:34:05 | D | - Quantizing single_transformer_blocks.15.proj_mlp.weight +25-08-28 16:34:05 | D | + quant_dtype: sint4 +25-08-28 16:34:05 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:05 | D | + scale_dtype: (None,) +25-08-28 16:34:05 | D | - Quantizing single_transformer_blocks.15.proj_out.linears.1.linear.weight +25-08-28 16:34:05 | D | + quant_dtype: sint4 +25-08-28 16:34:05 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:05 | D | + scale_dtype: (None,) +25-08-28 16:34:06 | D | - Quantizing weights: block single_transformer_blocks.16 +25-08-28 16:34:06 | D | - Quantizing single_transformer_blocks.16.norm.linear.weight +25-08-28 16:34:06 | D | + quant_dtype: sint4 +25-08-28 16:34:06 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:06 | D | + scale_dtype: (None,) +25-08-28 16:34:06 | D | - Quantizing single_transformer_blocks.16.attn.to_q.weight +25-08-28 16:34:06 | D | + quant_dtype: sint4 +25-08-28 16:34:06 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:06 | D | + scale_dtype: (None,) +25-08-28 16:34:06 | D | - Quantizing single_transformer_blocks.16.attn.to_k.weight +25-08-28 16:34:06 | D | + quant_dtype: sint4 +25-08-28 16:34:06 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:06 | D | + scale_dtype: (None,) +25-08-28 16:34:06 | D | - Quantizing single_transformer_blocks.16.attn.to_v.weight +25-08-28 16:34:06 | D | + quant_dtype: sint4 +25-08-28 16:34:06 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:06 | D | + scale_dtype: (None,) +25-08-28 16:34:06 | D | - Quantizing single_transformer_blocks.16.proj_out.linears.0.weight +25-08-28 16:34:06 | D | + quant_dtype: sint4 +25-08-28 16:34:06 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:06 | D | + scale_dtype: (None,) +25-08-28 16:34:06 | D | - Quantizing single_transformer_blocks.16.proj_mlp.weight +25-08-28 16:34:06 | D | + quant_dtype: sint4 +25-08-28 16:34:06 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:06 | D | + scale_dtype: (None,) +25-08-28 16:34:07 | D | - Quantizing single_transformer_blocks.16.proj_out.linears.1.linear.weight +25-08-28 16:34:07 | D | + quant_dtype: sint4 +25-08-28 16:34:07 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:07 | D | + scale_dtype: (None,) +25-08-28 16:34:07 | D | - Quantizing weights: block single_transformer_blocks.17 +25-08-28 16:34:07 | D | - Quantizing single_transformer_blocks.17.norm.linear.weight +25-08-28 16:34:07 | D | + quant_dtype: sint4 +25-08-28 16:34:07 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:07 | D | + scale_dtype: (None,) +25-08-28 16:34:07 | D | - Quantizing single_transformer_blocks.17.attn.to_q.weight +25-08-28 16:34:07 | D | + quant_dtype: sint4 +25-08-28 16:34:07 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:07 | D | + scale_dtype: (None,) +25-08-28 16:34:07 | D | - Quantizing single_transformer_blocks.17.attn.to_k.weight +25-08-28 16:34:07 | D | + quant_dtype: sint4 +25-08-28 16:34:07 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:07 | D | + scale_dtype: (None,) +25-08-28 16:34:07 | D | - Quantizing single_transformer_blocks.17.attn.to_v.weight +25-08-28 16:34:07 | D | + quant_dtype: sint4 +25-08-28 16:34:07 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:07 | D | + scale_dtype: (None,) +25-08-28 16:34:08 | D | - Quantizing single_transformer_blocks.17.proj_out.linears.0.weight +25-08-28 16:34:08 | D | + quant_dtype: sint4 +25-08-28 16:34:08 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:08 | D | + scale_dtype: (None,) +25-08-28 16:34:08 | D | - Quantizing single_transformer_blocks.17.proj_mlp.weight +25-08-28 16:34:08 | D | + quant_dtype: sint4 +25-08-28 16:34:08 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:08 | D | + scale_dtype: (None,) +25-08-28 16:34:08 | D | - Quantizing single_transformer_blocks.17.proj_out.linears.1.linear.weight +25-08-28 16:34:08 | D | + quant_dtype: sint4 +25-08-28 16:34:08 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:08 | D | + scale_dtype: (None,) +25-08-28 16:34:08 | D | - Quantizing weights: block single_transformer_blocks.18 +25-08-28 16:34:08 | D | - Quantizing single_transformer_blocks.18.norm.linear.weight +25-08-28 16:34:08 | D | + quant_dtype: sint4 +25-08-28 16:34:08 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:08 | D | + scale_dtype: (None,) +25-08-28 16:34:08 | D | - Quantizing single_transformer_blocks.18.attn.to_q.weight +25-08-28 16:34:08 | D | + quant_dtype: sint4 +25-08-28 16:34:08 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:08 | D | + scale_dtype: (None,) +25-08-28 16:34:09 | D | - Quantizing single_transformer_blocks.18.attn.to_k.weight +25-08-28 16:34:09 | D | + quant_dtype: sint4 +25-08-28 16:34:09 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:09 | D | + scale_dtype: (None,) +25-08-28 16:34:09 | D | - Quantizing single_transformer_blocks.18.attn.to_v.weight +25-08-28 16:34:09 | D | + quant_dtype: sint4 +25-08-28 16:34:09 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:09 | D | + scale_dtype: (None,) +25-08-28 16:34:09 | D | - Quantizing single_transformer_blocks.18.proj_out.linears.0.weight +25-08-28 16:34:09 | D | + quant_dtype: sint4 +25-08-28 16:34:09 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:09 | D | + scale_dtype: (None,) +25-08-28 16:34:09 | D | - Quantizing single_transformer_blocks.18.proj_mlp.weight +25-08-28 16:34:09 | D | + quant_dtype: sint4 +25-08-28 16:34:09 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:09 | D | + scale_dtype: (None,) +25-08-28 16:34:09 | D | - Quantizing single_transformer_blocks.18.proj_out.linears.1.linear.weight +25-08-28 16:34:09 | D | + quant_dtype: sint4 +25-08-28 16:34:09 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:09 | D | + scale_dtype: (None,) +25-08-28 16:34:09 | D | - Quantizing weights: block single_transformer_blocks.19 +25-08-28 16:34:09 | D | - Quantizing single_transformer_blocks.19.norm.linear.weight +25-08-28 16:34:09 | D | + quant_dtype: sint4 +25-08-28 16:34:09 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:09 | D | + scale_dtype: (None,) +25-08-28 16:34:10 | D | - Quantizing single_transformer_blocks.19.attn.to_q.weight +25-08-28 16:34:10 | D | + quant_dtype: sint4 +25-08-28 16:34:10 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:10 | D | + scale_dtype: (None,) +25-08-28 16:34:10 | D | - Quantizing single_transformer_blocks.19.attn.to_k.weight +25-08-28 16:34:10 | D | + quant_dtype: sint4 +25-08-28 16:34:10 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:10 | D | + scale_dtype: (None,) +25-08-28 16:34:10 | D | - Quantizing single_transformer_blocks.19.attn.to_v.weight +25-08-28 16:34:10 | D | + quant_dtype: sint4 +25-08-28 16:34:10 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:10 | D | + scale_dtype: (None,) +25-08-28 16:34:10 | D | - Quantizing single_transformer_blocks.19.proj_out.linears.0.weight +25-08-28 16:34:10 | D | + quant_dtype: sint4 +25-08-28 16:34:10 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:10 | D | + scale_dtype: (None,) +25-08-28 16:34:11 | D | - Quantizing single_transformer_blocks.19.proj_mlp.weight +25-08-28 16:34:11 | D | + quant_dtype: sint4 +25-08-28 16:34:11 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:11 | D | + scale_dtype: (None,) +25-08-28 16:34:11 | D | - Quantizing single_transformer_blocks.19.proj_out.linears.1.linear.weight +25-08-28 16:34:11 | D | + quant_dtype: sint4 +25-08-28 16:34:11 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:11 | D | + scale_dtype: (None,) +25-08-28 16:34:11 | D | - Quantizing weights: block single_transformer_blocks.20 +25-08-28 16:34:11 | D | - Quantizing single_transformer_blocks.20.norm.linear.weight +25-08-28 16:34:11 | D | + quant_dtype: sint4 +25-08-28 16:34:11 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:11 | D | + scale_dtype: (None,) +25-08-28 16:34:11 | D | - Quantizing single_transformer_blocks.20.attn.to_q.weight +25-08-28 16:34:11 | D | + quant_dtype: sint4 +25-08-28 16:34:11 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:11 | D | + scale_dtype: (None,) +25-08-28 16:34:11 | D | - Quantizing single_transformer_blocks.20.attn.to_k.weight +25-08-28 16:34:11 | D | + quant_dtype: sint4 +25-08-28 16:34:11 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:11 | D | + scale_dtype: (None,) +25-08-28 16:34:11 | D | - Quantizing single_transformer_blocks.20.attn.to_v.weight +25-08-28 16:34:11 | D | + quant_dtype: sint4 +25-08-28 16:34:11 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:11 | D | + scale_dtype: (None,) +25-08-28 16:34:12 | D | - Quantizing single_transformer_blocks.20.proj_out.linears.0.weight +25-08-28 16:34:12 | D | + quant_dtype: sint4 +25-08-28 16:34:12 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:12 | D | + scale_dtype: (None,) +25-08-28 16:34:12 | D | - Quantizing single_transformer_blocks.20.proj_mlp.weight +25-08-28 16:34:12 | D | + quant_dtype: sint4 +25-08-28 16:34:12 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:12 | D | + scale_dtype: (None,) +25-08-28 16:34:12 | D | - Quantizing single_transformer_blocks.20.proj_out.linears.1.linear.weight +25-08-28 16:34:12 | D | + quant_dtype: sint4 +25-08-28 16:34:12 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:12 | D | + scale_dtype: (None,) +25-08-28 16:34:12 | D | - Quantizing weights: block single_transformer_blocks.21 +25-08-28 16:34:12 | D | - Quantizing single_transformer_blocks.21.norm.linear.weight +25-08-28 16:34:12 | D | + quant_dtype: sint4 +25-08-28 16:34:12 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:12 | D | + scale_dtype: (None,) +25-08-28 16:34:12 | D | - Quantizing single_transformer_blocks.21.attn.to_q.weight +25-08-28 16:34:12 | D | + quant_dtype: sint4 +25-08-28 16:34:12 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:12 | D | + scale_dtype: (None,) +25-08-28 16:34:13 | D | - Quantizing single_transformer_blocks.21.attn.to_k.weight +25-08-28 16:34:13 | D | + quant_dtype: sint4 +25-08-28 16:34:13 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:13 | D | + scale_dtype: (None,) +25-08-28 16:34:13 | D | - Quantizing single_transformer_blocks.21.attn.to_v.weight +25-08-28 16:34:13 | D | + quant_dtype: sint4 +25-08-28 16:34:13 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:13 | D | + scale_dtype: (None,) +25-08-28 16:34:13 | D | - Quantizing single_transformer_blocks.21.proj_out.linears.0.weight +25-08-28 16:34:13 | D | + quant_dtype: sint4 +25-08-28 16:34:13 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:13 | D | + scale_dtype: (None,) +25-08-28 16:34:13 | D | - Quantizing single_transformer_blocks.21.proj_mlp.weight +25-08-28 16:34:13 | D | + quant_dtype: sint4 +25-08-28 16:34:13 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:13 | D | + scale_dtype: (None,) +25-08-28 16:34:13 | D | - Quantizing single_transformer_blocks.21.proj_out.linears.1.linear.weight +25-08-28 16:34:13 | D | + quant_dtype: sint4 +25-08-28 16:34:13 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:13 | D | + scale_dtype: (None,) +25-08-28 16:34:14 | D | - Quantizing weights: block single_transformer_blocks.22 +25-08-28 16:34:14 | D | - Quantizing single_transformer_blocks.22.norm.linear.weight +25-08-28 16:34:14 | D | + quant_dtype: sint4 +25-08-28 16:34:14 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:14 | D | + scale_dtype: (None,) +25-08-28 16:34:14 | D | - Quantizing single_transformer_blocks.22.attn.to_q.weight +25-08-28 16:34:14 | D | + quant_dtype: sint4 +25-08-28 16:34:14 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:14 | D | + scale_dtype: (None,) +25-08-28 16:34:14 | D | - Quantizing single_transformer_blocks.22.attn.to_k.weight +25-08-28 16:34:14 | D | + quant_dtype: sint4 +25-08-28 16:34:14 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:14 | D | + scale_dtype: (None,) +25-08-28 16:34:14 | D | - Quantizing single_transformer_blocks.22.attn.to_v.weight +25-08-28 16:34:14 | D | + quant_dtype: sint4 +25-08-28 16:34:14 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:14 | D | + scale_dtype: (None,) +25-08-28 16:34:14 | D | - Quantizing single_transformer_blocks.22.proj_out.linears.0.weight +25-08-28 16:34:14 | D | + quant_dtype: sint4 +25-08-28 16:34:14 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:14 | D | + scale_dtype: (None,) +25-08-28 16:34:14 | D | - Quantizing single_transformer_blocks.22.proj_mlp.weight +25-08-28 16:34:14 | D | + quant_dtype: sint4 +25-08-28 16:34:14 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:14 | D | + scale_dtype: (None,) +25-08-28 16:34:15 | D | - Quantizing single_transformer_blocks.22.proj_out.linears.1.linear.weight +25-08-28 16:34:15 | D | + quant_dtype: sint4 +25-08-28 16:34:15 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:15 | D | + scale_dtype: (None,) +25-08-28 16:34:15 | D | - Quantizing weights: block single_transformer_blocks.23 +25-08-28 16:34:15 | D | - Quantizing single_transformer_blocks.23.norm.linear.weight +25-08-28 16:34:15 | D | + quant_dtype: sint4 +25-08-28 16:34:15 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:15 | D | + scale_dtype: (None,) +25-08-28 16:34:15 | D | - Quantizing single_transformer_blocks.23.attn.to_q.weight +25-08-28 16:34:15 | D | + quant_dtype: sint4 +25-08-28 16:34:15 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:15 | D | + scale_dtype: (None,) +25-08-28 16:34:15 | D | - Quantizing single_transformer_blocks.23.attn.to_k.weight +25-08-28 16:34:15 | D | + quant_dtype: sint4 +25-08-28 16:34:15 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:15 | D | + scale_dtype: (None,) +25-08-28 16:34:15 | D | - Quantizing single_transformer_blocks.23.attn.to_v.weight +25-08-28 16:34:15 | D | + quant_dtype: sint4 +25-08-28 16:34:15 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:15 | D | + scale_dtype: (None,) +25-08-28 16:34:16 | D | - Quantizing single_transformer_blocks.23.proj_out.linears.0.weight +25-08-28 16:34:16 | D | + quant_dtype: sint4 +25-08-28 16:34:16 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:16 | D | + scale_dtype: (None,) +25-08-28 16:34:16 | D | - Quantizing single_transformer_blocks.23.proj_mlp.weight +25-08-28 16:34:16 | D | + quant_dtype: sint4 +25-08-28 16:34:16 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:16 | D | + scale_dtype: (None,) +25-08-28 16:34:16 | D | - Quantizing single_transformer_blocks.23.proj_out.linears.1.linear.weight +25-08-28 16:34:16 | D | + quant_dtype: sint4 +25-08-28 16:34:16 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:16 | D | + scale_dtype: (None,) +25-08-28 16:34:16 | D | - Quantizing weights: block single_transformer_blocks.24 +25-08-28 16:34:16 | D | - Quantizing single_transformer_blocks.24.norm.linear.weight +25-08-28 16:34:16 | D | + quant_dtype: sint4 +25-08-28 16:34:16 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:16 | D | + scale_dtype: (None,) +25-08-28 16:34:16 | D | - Quantizing single_transformer_blocks.24.attn.to_q.weight +25-08-28 16:34:16 | D | + quant_dtype: sint4 +25-08-28 16:34:16 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:16 | D | + scale_dtype: (None,) +25-08-28 16:34:17 | D | - Quantizing single_transformer_blocks.24.attn.to_k.weight +25-08-28 16:34:17 | D | + quant_dtype: sint4 +25-08-28 16:34:17 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:17 | D | + scale_dtype: (None,) +25-08-28 16:34:17 | D | - Quantizing single_transformer_blocks.24.attn.to_v.weight +25-08-28 16:34:17 | D | + quant_dtype: sint4 +25-08-28 16:34:17 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:17 | D | + scale_dtype: (None,) +25-08-28 16:34:17 | D | - Quantizing single_transformer_blocks.24.proj_out.linears.0.weight +25-08-28 16:34:17 | D | + quant_dtype: sint4 +25-08-28 16:34:17 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:17 | D | + scale_dtype: (None,) +25-08-28 16:34:17 | D | - Quantizing single_transformer_blocks.24.proj_mlp.weight +25-08-28 16:34:17 | D | + quant_dtype: sint4 +25-08-28 16:34:17 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:17 | D | + scale_dtype: (None,) +25-08-28 16:34:17 | D | - Quantizing single_transformer_blocks.24.proj_out.linears.1.linear.weight +25-08-28 16:34:17 | D | + quant_dtype: sint4 +25-08-28 16:34:17 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:17 | D | + scale_dtype: (None,) +25-08-28 16:34:18 | D | - Quantizing weights: block single_transformer_blocks.25 +25-08-28 16:34:18 | D | - Quantizing single_transformer_blocks.25.norm.linear.weight +25-08-28 16:34:18 | D | + quant_dtype: sint4 +25-08-28 16:34:18 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:18 | D | + scale_dtype: (None,) +25-08-28 16:34:18 | D | - Quantizing single_transformer_blocks.25.attn.to_q.weight +25-08-28 16:34:18 | D | + quant_dtype: sint4 +25-08-28 16:34:18 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:18 | D | + scale_dtype: (None,) +25-08-28 16:34:18 | D | - Quantizing single_transformer_blocks.25.attn.to_k.weight +25-08-28 16:34:18 | D | + quant_dtype: sint4 +25-08-28 16:34:18 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:18 | D | + scale_dtype: (None,) +25-08-28 16:34:18 | D | - Quantizing single_transformer_blocks.25.attn.to_v.weight +25-08-28 16:34:18 | D | + quant_dtype: sint4 +25-08-28 16:34:18 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:18 | D | + scale_dtype: (None,) +25-08-28 16:34:18 | D | - Quantizing single_transformer_blocks.25.proj_out.linears.0.weight +25-08-28 16:34:18 | D | + quant_dtype: sint4 +25-08-28 16:34:18 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:18 | D | + scale_dtype: (None,) +25-08-28 16:34:18 | D | - Quantizing single_transformer_blocks.25.proj_mlp.weight +25-08-28 16:34:18 | D | + quant_dtype: sint4 +25-08-28 16:34:18 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:18 | D | + scale_dtype: (None,) +25-08-28 16:34:19 | D | - Quantizing single_transformer_blocks.25.proj_out.linears.1.linear.weight +25-08-28 16:34:19 | D | + quant_dtype: sint4 +25-08-28 16:34:19 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:19 | D | + scale_dtype: (None,) +25-08-28 16:34:19 | D | - Quantizing weights: block single_transformer_blocks.26 +25-08-28 16:34:19 | D | - Quantizing single_transformer_blocks.26.norm.linear.weight +25-08-28 16:34:19 | D | + quant_dtype: sint4 +25-08-28 16:34:19 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:19 | D | + scale_dtype: (None,) +25-08-28 16:34:19 | D | - Quantizing single_transformer_blocks.26.attn.to_q.weight +25-08-28 16:34:19 | D | + quant_dtype: sint4 +25-08-28 16:34:19 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:19 | D | + scale_dtype: (None,) +25-08-28 16:34:19 | D | - Quantizing single_transformer_blocks.26.attn.to_k.weight +25-08-28 16:34:19 | D | + quant_dtype: sint4 +25-08-28 16:34:19 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:19 | D | + scale_dtype: (None,) +25-08-28 16:34:19 | D | - Quantizing single_transformer_blocks.26.attn.to_v.weight +25-08-28 16:34:19 | D | + quant_dtype: sint4 +25-08-28 16:34:19 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:19 | D | + scale_dtype: (None,) +25-08-28 16:34:20 | D | - Quantizing single_transformer_blocks.26.proj_out.linears.0.weight +25-08-28 16:34:20 | D | + quant_dtype: sint4 +25-08-28 16:34:20 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:20 | D | + scale_dtype: (None,) +25-08-28 16:34:20 | D | - Quantizing single_transformer_blocks.26.proj_mlp.weight +25-08-28 16:34:20 | D | + quant_dtype: sint4 +25-08-28 16:34:20 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:20 | D | + scale_dtype: (None,) +25-08-28 16:34:20 | D | - Quantizing single_transformer_blocks.26.proj_out.linears.1.linear.weight +25-08-28 16:34:20 | D | + quant_dtype: sint4 +25-08-28 16:34:20 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:20 | D | + scale_dtype: (None,) +25-08-28 16:34:20 | D | - Quantizing weights: block single_transformer_blocks.27 +25-08-28 16:34:20 | D | - Quantizing single_transformer_blocks.27.norm.linear.weight +25-08-28 16:34:20 | D | + quant_dtype: sint4 +25-08-28 16:34:20 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:20 | D | + scale_dtype: (None,) +25-08-28 16:34:20 | D | - Quantizing single_transformer_blocks.27.attn.to_q.weight +25-08-28 16:34:20 | D | + quant_dtype: sint4 +25-08-28 16:34:20 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:20 | D | + scale_dtype: (None,) +25-08-28 16:34:21 | D | - Quantizing single_transformer_blocks.27.attn.to_k.weight +25-08-28 16:34:21 | D | + quant_dtype: sint4 +25-08-28 16:34:21 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:21 | D | + scale_dtype: (None,) +25-08-28 16:34:21 | D | - Quantizing single_transformer_blocks.27.attn.to_v.weight +25-08-28 16:34:21 | D | + quant_dtype: sint4 +25-08-28 16:34:21 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:21 | D | + scale_dtype: (None,) +25-08-28 16:34:21 | D | - Quantizing single_transformer_blocks.27.proj_out.linears.0.weight +25-08-28 16:34:21 | D | + quant_dtype: sint4 +25-08-28 16:34:21 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:21 | D | + scale_dtype: (None,) +25-08-28 16:34:21 | D | - Quantizing single_transformer_blocks.27.proj_mlp.weight +25-08-28 16:34:21 | D | + quant_dtype: sint4 +25-08-28 16:34:21 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:21 | D | + scale_dtype: (None,) +25-08-28 16:34:21 | D | - Quantizing single_transformer_blocks.27.proj_out.linears.1.linear.weight +25-08-28 16:34:21 | D | + quant_dtype: sint4 +25-08-28 16:34:21 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:21 | D | + scale_dtype: (None,) +25-08-28 16:34:22 | D | - Quantizing weights: block single_transformer_blocks.28 +25-08-28 16:34:22 | D | - Quantizing single_transformer_blocks.28.norm.linear.weight +25-08-28 16:34:22 | D | + quant_dtype: sint4 +25-08-28 16:34:22 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:22 | D | + scale_dtype: (None,) +25-08-28 16:34:22 | D | - Quantizing single_transformer_blocks.28.attn.to_q.weight +25-08-28 16:34:22 | D | + quant_dtype: sint4 +25-08-28 16:34:22 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:22 | D | + scale_dtype: (None,) +25-08-28 16:34:22 | D | - Quantizing single_transformer_blocks.28.attn.to_k.weight +25-08-28 16:34:22 | D | + quant_dtype: sint4 +25-08-28 16:34:22 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:22 | D | + scale_dtype: (None,) +25-08-28 16:34:22 | D | - Quantizing single_transformer_blocks.28.attn.to_v.weight +25-08-28 16:34:22 | D | + quant_dtype: sint4 +25-08-28 16:34:22 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:22 | D | + scale_dtype: (None,) +25-08-28 16:34:22 | D | - Quantizing single_transformer_blocks.28.proj_out.linears.0.weight +25-08-28 16:34:22 | D | + quant_dtype: sint4 +25-08-28 16:34:22 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:22 | D | + scale_dtype: (None,) +25-08-28 16:34:23 | D | - Quantizing single_transformer_blocks.28.proj_mlp.weight +25-08-28 16:34:23 | D | + quant_dtype: sint4 +25-08-28 16:34:23 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:23 | D | + scale_dtype: (None,) +25-08-28 16:34:23 | D | - Quantizing single_transformer_blocks.28.proj_out.linears.1.linear.weight +25-08-28 16:34:23 | D | + quant_dtype: sint4 +25-08-28 16:34:23 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:23 | D | + scale_dtype: (None,) +25-08-28 16:34:23 | D | - Quantizing weights: block single_transformer_blocks.29 +25-08-28 16:34:23 | D | - Quantizing single_transformer_blocks.29.norm.linear.weight +25-08-28 16:34:23 | D | + quant_dtype: sint4 +25-08-28 16:34:23 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:23 | D | + scale_dtype: (None,) +25-08-28 16:34:23 | D | - Quantizing single_transformer_blocks.29.attn.to_q.weight +25-08-28 16:34:23 | D | + quant_dtype: sint4 +25-08-28 16:34:23 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:23 | D | + scale_dtype: (None,) +25-08-28 16:34:23 | D | - Quantizing single_transformer_blocks.29.attn.to_k.weight +25-08-28 16:34:23 | D | + quant_dtype: sint4 +25-08-28 16:34:23 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:23 | D | + scale_dtype: (None,) +25-08-28 16:34:23 | D | - Quantizing single_transformer_blocks.29.attn.to_v.weight +25-08-28 16:34:23 | D | + quant_dtype: sint4 +25-08-28 16:34:23 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:23 | D | + scale_dtype: (None,) +25-08-28 16:34:24 | D | - Quantizing single_transformer_blocks.29.proj_out.linears.0.weight +25-08-28 16:34:24 | D | + quant_dtype: sint4 +25-08-28 16:34:24 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:24 | D | + scale_dtype: (None,) +25-08-28 16:34:24 | D | - Quantizing single_transformer_blocks.29.proj_mlp.weight +25-08-28 16:34:24 | D | + quant_dtype: sint4 +25-08-28 16:34:24 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:24 | D | + scale_dtype: (None,) +25-08-28 16:34:24 | D | - Quantizing single_transformer_blocks.29.proj_out.linears.1.linear.weight +25-08-28 16:34:24 | D | + quant_dtype: sint4 +25-08-28 16:34:24 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:24 | D | + scale_dtype: (None,) +25-08-28 16:34:24 | D | - Quantizing weights: block single_transformer_blocks.30 +25-08-28 16:34:24 | D | - Quantizing single_transformer_blocks.30.norm.linear.weight +25-08-28 16:34:24 | D | + quant_dtype: sint4 +25-08-28 16:34:24 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:24 | D | + scale_dtype: (None,) +25-08-28 16:34:24 | D | - Quantizing single_transformer_blocks.30.attn.to_q.weight +25-08-28 16:34:24 | D | + quant_dtype: sint4 +25-08-28 16:34:24 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:24 | D | + scale_dtype: (None,) +25-08-28 16:34:25 | D | - Quantizing single_transformer_blocks.30.attn.to_k.weight +25-08-28 16:34:25 | D | + quant_dtype: sint4 +25-08-28 16:34:25 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:25 | D | + scale_dtype: (None,) +25-08-28 16:34:25 | D | - Quantizing single_transformer_blocks.30.attn.to_v.weight +25-08-28 16:34:25 | D | + quant_dtype: sint4 +25-08-28 16:34:25 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:25 | D | + scale_dtype: (None,) +25-08-28 16:34:25 | D | - Quantizing single_transformer_blocks.30.proj_out.linears.0.weight +25-08-28 16:34:25 | D | + quant_dtype: sint4 +25-08-28 16:34:25 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:25 | D | + scale_dtype: (None,) +25-08-28 16:34:25 | D | - Quantizing single_transformer_blocks.30.proj_mlp.weight +25-08-28 16:34:25 | D | + quant_dtype: sint4 +25-08-28 16:34:25 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:25 | D | + scale_dtype: (None,) +25-08-28 16:34:25 | D | - Quantizing single_transformer_blocks.30.proj_out.linears.1.linear.weight +25-08-28 16:34:25 | D | + quant_dtype: sint4 +25-08-28 16:34:25 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:25 | D | + scale_dtype: (None,) +25-08-28 16:34:26 | D | - Quantizing weights: block single_transformer_blocks.31 +25-08-28 16:34:26 | D | - Quantizing single_transformer_blocks.31.norm.linear.weight +25-08-28 16:34:26 | D | + quant_dtype: sint4 +25-08-28 16:34:26 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:26 | D | + scale_dtype: (None,) +25-08-28 16:34:26 | D | - Quantizing single_transformer_blocks.31.attn.to_q.weight +25-08-28 16:34:26 | D | + quant_dtype: sint4 +25-08-28 16:34:26 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:26 | D | + scale_dtype: (None,) +25-08-28 16:34:26 | D | - Quantizing single_transformer_blocks.31.attn.to_k.weight +25-08-28 16:34:26 | D | + quant_dtype: sint4 +25-08-28 16:34:26 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:26 | D | + scale_dtype: (None,) +25-08-28 16:34:26 | D | - Quantizing single_transformer_blocks.31.attn.to_v.weight +25-08-28 16:34:26 | D | + quant_dtype: sint4 +25-08-28 16:34:26 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:26 | D | + scale_dtype: (None,) +25-08-28 16:34:26 | D | - Quantizing single_transformer_blocks.31.proj_out.linears.0.weight +25-08-28 16:34:26 | D | + quant_dtype: sint4 +25-08-28 16:34:26 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:26 | D | + scale_dtype: (None,) +25-08-28 16:34:26 | D | - Quantizing single_transformer_blocks.31.proj_mlp.weight +25-08-28 16:34:26 | D | + quant_dtype: sint4 +25-08-28 16:34:26 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:26 | D | + scale_dtype: (None,) +25-08-28 16:34:27 | D | - Quantizing single_transformer_blocks.31.proj_out.linears.1.linear.weight +25-08-28 16:34:27 | D | + quant_dtype: sint4 +25-08-28 16:34:27 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:27 | D | + scale_dtype: (None,) +25-08-28 16:34:27 | D | - Quantizing weights: block single_transformer_blocks.32 +25-08-28 16:34:27 | D | - Quantizing single_transformer_blocks.32.norm.linear.weight +25-08-28 16:34:27 | D | + quant_dtype: sint4 +25-08-28 16:34:27 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:27 | D | + scale_dtype: (None,) +25-08-28 16:34:27 | D | - Quantizing single_transformer_blocks.32.attn.to_q.weight +25-08-28 16:34:27 | D | + quant_dtype: sint4 +25-08-28 16:34:27 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:27 | D | + scale_dtype: (None,) +25-08-28 16:34:27 | D | - Quantizing single_transformer_blocks.32.attn.to_k.weight +25-08-28 16:34:27 | D | + quant_dtype: sint4 +25-08-28 16:34:27 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:27 | D | + scale_dtype: (None,) +25-08-28 16:34:27 | D | - Quantizing single_transformer_blocks.32.attn.to_v.weight +25-08-28 16:34:27 | D | + quant_dtype: sint4 +25-08-28 16:34:27 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:27 | D | + scale_dtype: (None,) +25-08-28 16:34:28 | D | - Quantizing single_transformer_blocks.32.proj_out.linears.0.weight +25-08-28 16:34:28 | D | + quant_dtype: sint4 +25-08-28 16:34:28 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:28 | D | + scale_dtype: (None,) +25-08-28 16:34:28 | D | - Quantizing single_transformer_blocks.32.proj_mlp.weight +25-08-28 16:34:28 | D | + quant_dtype: sint4 +25-08-28 16:34:28 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:28 | D | + scale_dtype: (None,) +25-08-28 16:34:28 | D | - Quantizing single_transformer_blocks.32.proj_out.linears.1.linear.weight +25-08-28 16:34:28 | D | + quant_dtype: sint4 +25-08-28 16:34:28 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:28 | D | + scale_dtype: (None,) +25-08-28 16:34:28 | D | - Quantizing weights: block single_transformer_blocks.33 +25-08-28 16:34:28 | D | - Quantizing single_transformer_blocks.33.norm.linear.weight +25-08-28 16:34:28 | D | + quant_dtype: sint4 +25-08-28 16:34:28 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:28 | D | + scale_dtype: (None,) +25-08-28 16:34:28 | D | - Quantizing single_transformer_blocks.33.attn.to_q.weight +25-08-28 16:34:28 | D | + quant_dtype: sint4 +25-08-28 16:34:28 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:28 | D | + scale_dtype: (None,) +25-08-28 16:34:29 | D | - Quantizing single_transformer_blocks.33.attn.to_k.weight +25-08-28 16:34:29 | D | + quant_dtype: sint4 +25-08-28 16:34:29 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:29 | D | + scale_dtype: (None,) +25-08-28 16:34:29 | D | - Quantizing single_transformer_blocks.33.attn.to_v.weight +25-08-28 16:34:29 | D | + quant_dtype: sint4 +25-08-28 16:34:29 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:29 | D | + scale_dtype: (None,) +25-08-28 16:34:29 | D | - Quantizing single_transformer_blocks.33.proj_out.linears.0.weight +25-08-28 16:34:29 | D | + quant_dtype: sint4 +25-08-28 16:34:29 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:29 | D | + scale_dtype: (None,) +25-08-28 16:34:29 | D | - Quantizing single_transformer_blocks.33.proj_mlp.weight +25-08-28 16:34:29 | D | + quant_dtype: sint4 +25-08-28 16:34:29 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:29 | D | + scale_dtype: (None,) +25-08-28 16:34:29 | D | - Quantizing single_transformer_blocks.33.proj_out.linears.1.linear.weight +25-08-28 16:34:29 | D | + quant_dtype: sint4 +25-08-28 16:34:29 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:29 | D | + scale_dtype: (None,) +25-08-28 16:34:30 | D | - Quantizing weights: block single_transformer_blocks.34 +25-08-28 16:34:30 | D | - Quantizing single_transformer_blocks.34.norm.linear.weight +25-08-28 16:34:30 | D | + quant_dtype: sint4 +25-08-28 16:34:30 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:30 | D | + scale_dtype: (None,) +25-08-28 16:34:30 | D | - Quantizing single_transformer_blocks.34.attn.to_q.weight +25-08-28 16:34:30 | D | + quant_dtype: sint4 +25-08-28 16:34:30 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:30 | D | + scale_dtype: (None,) +25-08-28 16:34:30 | D | - Quantizing single_transformer_blocks.34.attn.to_k.weight +25-08-28 16:34:30 | D | + quant_dtype: sint4 +25-08-28 16:34:30 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:30 | D | + scale_dtype: (None,) +25-08-28 16:34:30 | D | - Quantizing single_transformer_blocks.34.attn.to_v.weight +25-08-28 16:34:30 | D | + quant_dtype: sint4 +25-08-28 16:34:30 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:30 | D | + scale_dtype: (None,) +25-08-28 16:34:30 | D | - Quantizing single_transformer_blocks.34.proj_out.linears.0.weight +25-08-28 16:34:30 | D | + quant_dtype: sint4 +25-08-28 16:34:30 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:30 | D | + scale_dtype: (None,) +25-08-28 16:34:30 | D | - Quantizing single_transformer_blocks.34.proj_mlp.weight +25-08-28 16:34:30 | D | + quant_dtype: sint4 +25-08-28 16:34:30 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:30 | D | + scale_dtype: (None,) +25-08-28 16:34:31 | D | - Quantizing single_transformer_blocks.34.proj_out.linears.1.linear.weight +25-08-28 16:34:31 | D | + quant_dtype: sint4 +25-08-28 16:34:31 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:31 | D | + scale_dtype: (None,) +25-08-28 16:34:31 | D | - Quantizing weights: block single_transformer_blocks.35 +25-08-28 16:34:31 | D | - Quantizing single_transformer_blocks.35.norm.linear.weight +25-08-28 16:34:31 | D | + quant_dtype: sint4 +25-08-28 16:34:31 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:31 | D | + scale_dtype: (None,) +25-08-28 16:34:31 | D | - Quantizing single_transformer_blocks.35.attn.to_q.weight +25-08-28 16:34:31 | D | + quant_dtype: sint4 +25-08-28 16:34:31 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:31 | D | + scale_dtype: (None,) +25-08-28 16:34:31 | D | - Quantizing single_transformer_blocks.35.attn.to_k.weight +25-08-28 16:34:31 | D | + quant_dtype: sint4 +25-08-28 16:34:31 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:31 | D | + scale_dtype: (None,) +25-08-28 16:34:31 | D | - Quantizing single_transformer_blocks.35.attn.to_v.weight +25-08-28 16:34:31 | D | + quant_dtype: sint4 +25-08-28 16:34:31 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:31 | D | + scale_dtype: (None,) +25-08-28 16:34:32 | D | - Quantizing single_transformer_blocks.35.proj_out.linears.0.weight +25-08-28 16:34:32 | D | + quant_dtype: sint4 +25-08-28 16:34:32 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:32 | D | + scale_dtype: (None,) +25-08-28 16:34:32 | D | - Quantizing single_transformer_blocks.35.proj_mlp.weight +25-08-28 16:34:32 | D | + quant_dtype: sint4 +25-08-28 16:34:32 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:32 | D | + scale_dtype: (None,) +25-08-28 16:34:32 | D | - Quantizing single_transformer_blocks.35.proj_out.linears.1.linear.weight +25-08-28 16:34:32 | D | + quant_dtype: sint4 +25-08-28 16:34:32 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:32 | D | + scale_dtype: (None,) +25-08-28 16:34:32 | D | - Quantizing weights: block single_transformer_blocks.36 +25-08-28 16:34:32 | D | - Quantizing single_transformer_blocks.36.norm.linear.weight +25-08-28 16:34:32 | D | + quant_dtype: sint4 +25-08-28 16:34:32 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:32 | D | + scale_dtype: (None,) +25-08-28 16:34:32 | D | - Quantizing single_transformer_blocks.36.attn.to_q.weight +25-08-28 16:34:32 | D | + quant_dtype: sint4 +25-08-28 16:34:32 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:32 | D | + scale_dtype: (None,) +25-08-28 16:34:33 | D | - Quantizing single_transformer_blocks.36.attn.to_k.weight +25-08-28 16:34:33 | D | + quant_dtype: sint4 +25-08-28 16:34:33 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:33 | D | + scale_dtype: (None,) +25-08-28 16:34:33 | D | - Quantizing single_transformer_blocks.36.attn.to_v.weight +25-08-28 16:34:33 | D | + quant_dtype: sint4 +25-08-28 16:34:33 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:33 | D | + scale_dtype: (None,) +25-08-28 16:34:33 | D | - Quantizing single_transformer_blocks.36.proj_out.linears.0.weight +25-08-28 16:34:33 | D | + quant_dtype: sint4 +25-08-28 16:34:33 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:33 | D | + scale_dtype: (None,) +25-08-28 16:34:33 | D | - Quantizing single_transformer_blocks.36.proj_mlp.weight +25-08-28 16:34:33 | D | + quant_dtype: sint4 +25-08-28 16:34:33 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:33 | D | + scale_dtype: (None,) +25-08-28 16:34:33 | D | - Quantizing single_transformer_blocks.36.proj_out.linears.1.linear.weight +25-08-28 16:34:33 | D | + quant_dtype: sint4 +25-08-28 16:34:33 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:33 | D | + scale_dtype: (None,) +25-08-28 16:34:33 | D | - Quantizing weights: block single_transformer_blocks.37 +25-08-28 16:34:33 | D | - Quantizing single_transformer_blocks.37.norm.linear.weight +25-08-28 16:34:33 | D | + quant_dtype: sint4 +25-08-28 16:34:33 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:33 | D | + scale_dtype: (None,) +25-08-28 16:34:34 | D | - Quantizing single_transformer_blocks.37.attn.to_q.weight +25-08-28 16:34:34 | D | + quant_dtype: sint4 +25-08-28 16:34:34 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:34 | D | + scale_dtype: (None,) +25-08-28 16:34:34 | D | - Quantizing single_transformer_blocks.37.attn.to_k.weight +25-08-28 16:34:34 | D | + quant_dtype: sint4 +25-08-28 16:34:34 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:34 | D | + scale_dtype: (None,) +25-08-28 16:34:34 | D | - Quantizing single_transformer_blocks.37.attn.to_v.weight +25-08-28 16:34:34 | D | + quant_dtype: sint4 +25-08-28 16:34:34 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:34 | D | + scale_dtype: (None,) +25-08-28 16:34:34 | D | - Quantizing single_transformer_blocks.37.proj_out.linears.0.weight +25-08-28 16:34:34 | D | + quant_dtype: sint4 +25-08-28 16:34:34 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:34 | D | + scale_dtype: (None,) +25-08-28 16:34:34 | D | - Quantizing single_transformer_blocks.37.proj_mlp.weight +25-08-28 16:34:34 | D | + quant_dtype: sint4 +25-08-28 16:34:34 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:34 | D | + scale_dtype: (None,) +25-08-28 16:34:35 | D | - Quantizing single_transformer_blocks.37.proj_out.linears.1.linear.weight +25-08-28 16:34:35 | D | + quant_dtype: sint4 +25-08-28 16:34:35 | D | + group_shape: ((1, 64, 1, 1, 1),) +25-08-28 16:34:35 | D | + scale_dtype: (None,) +25-08-28 16:34:35 | I | - Saving branch settings to runs/diffusion/cache/quant/qdiff.128/branch/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/w.skip.[e+rs+rtp+s+tpi+tpo]-x.skip.[e+rs+rtp+s+tan+tn+tpi+tpo]-y.skip.[]/shift/lowrank.r32/smooth.proj.OutputsError.GridSearch.Layer.d2.en1.sn1/smooth.proj.[a.AbsMax.b.AbsMax]/smooth.proj.g20.bn2.lr.nf/smooth.proj.skip.[rc+tan+tn]/lowrank.OutputsError.Manual.Layer.d2.en1.sn1/lowrank.i100.r32.earlystop/lowrank.skip.[rc+tan+tn]/flux.1-dev-ghibli.pt +25-08-28 16:34:35 | I | - Saving weight settings to runs/diffusion/flux.1/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000.RUNNING/run-250828.070127.RUNNING/model/wgts.pt +25-08-28 16:34:35 | I | - Linking branch settings to runs/diffusion/flux.1/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000.RUNNING/run-250828.070127.RUNNING/model/branch.pt +25-08-28 16:34:35 | I | - Saving model to runs/diffusion/flux.1/flux.1-dev-ghibli/w.4-x.4-y.16/w.sint4-x.sint4.u-y.bf16/w.v64.bf16-x.v64.bf16-y.tnsr.bf16/smooth.proj-w.static.lowrank/shift-skip.x.[[w]+tan+tn].w.[e+rs+rtp+s+tpi+tpo]-low.r32.i100.e.skip.[rc+tan+tn]-smth.proj.GridSearch.bn2.[AbsMax].lr.skip.[rc+tan+tn]-qdiff.128-t50.g3.5-s5000.RUNNING/run-250828.070127.RUNNING/model +25-08-28 16:35:03 | I | * Quantizing activations +25-08-28 16:35:03 | I | - No need to generate/load activation quantizer settings +25-08-28 16:35:03 | D | - Quantizing layer transformer_blocks.0 +25-08-28 16:35:03 | D | - Calibrating transformer_blocks.0.attn.to_q.input, transformer_blocks.0.attn.to_k.input, transformer_blocks.0.attn.to_v.input +25-08-28 16:35:03 | D | - Calibrating transformer_blocks.0.attn.add_q_proj.input, transformer_blocks.0.attn.add_k_proj.input, transformer_blocks.0.attn.add_v_proj.input +25-08-28 16:35:03 | D | - Calibrating transformer_blocks.0.attn.to_out.0.input +25-08-28 16:35:04 | D | - Calibrating transformer_blocks.0.attn.to_add_out.input +25-08-28 16:35:04 | D | - Calibrating transformer_blocks.0.ff.net.0.proj.input +25-08-28 16:35:04 | D | - Calibrating transformer_blocks.0.ff.net.2.linear.input +25-08-28 16:35:04 | D | - Calibrating transformer_blocks.0.ff_context.net.0.proj.input +25-08-28 16:35:04 | D | - Calibrating transformer_blocks.0.ff_context.net.2.linear.input +25-08-28 16:35:04 | D | - Quantizing layer transformer_blocks.1 +25-08-28 16:35:04 | D | - Calibrating transformer_blocks.1.attn.to_q.input, transformer_blocks.1.attn.to_k.input, transformer_blocks.1.attn.to_v.input +25-08-28 16:35:05 | D | - Calibrating transformer_blocks.1.attn.add_q_proj.input, transformer_blocks.1.attn.add_k_proj.input, transformer_blocks.1.attn.add_v_proj.input +25-08-28 16:35:05 | D | - Calibrating transformer_blocks.1.attn.to_out.0.input +25-08-28 16:35:05 | D | - Calibrating transformer_blocks.1.attn.to_add_out.input +25-08-28 16:35:05 | D | - Calibrating transformer_blocks.1.ff.net.0.proj.input +25-08-28 16:35:05 | D | - Calibrating transformer_blocks.1.ff.net.2.linear.input +25-08-28 16:35:06 | D | - Calibrating transformer_blocks.1.ff_context.net.0.proj.input +25-08-28 16:35:06 | D | - Calibrating transformer_blocks.1.ff_context.net.2.linear.input +25-08-28 16:35:06 | D | - Quantizing layer transformer_blocks.2 +25-08-28 16:35:06 | D | - Calibrating transformer_blocks.2.attn.to_q.input, transformer_blocks.2.attn.to_k.input, transformer_blocks.2.attn.to_v.input +25-08-28 16:35:06 | D | - Calibrating transformer_blocks.2.attn.add_q_proj.input, transformer_blocks.2.attn.add_k_proj.input, transformer_blocks.2.attn.add_v_proj.input +25-08-28 16:35:06 | D | - Calibrating transformer_blocks.2.attn.to_out.0.input +25-08-28 16:35:07 | D | - Calibrating transformer_blocks.2.attn.to_add_out.input +25-08-28 16:35:07 | D | - Calibrating transformer_blocks.2.ff.net.0.proj.input +25-08-28 16:35:07 | D | - Calibrating transformer_blocks.2.ff.net.2.linear.input +25-08-28 16:35:07 | D | - Calibrating transformer_blocks.2.ff_context.net.0.proj.input +25-08-28 16:35:07 | D | - Calibrating transformer_blocks.2.ff_context.net.2.linear.input +25-08-28 16:35:07 | D | - Quantizing layer transformer_blocks.3 +25-08-28 16:35:07 | D | - Calibrating transformer_blocks.3.attn.to_q.input, transformer_blocks.3.attn.to_k.input, transformer_blocks.3.attn.to_v.input +25-08-28 16:35:08 | D | - Calibrating transformer_blocks.3.attn.add_q_proj.input, transformer_blocks.3.attn.add_k_proj.input, transformer_blocks.3.attn.add_v_proj.input +25-08-28 16:35:08 | D | - Calibrating transformer_blocks.3.attn.to_out.0.input +25-08-28 16:35:08 | D | - Calibrating transformer_blocks.3.attn.to_add_out.input +25-08-28 16:35:08 | D | - Calibrating transformer_blocks.3.ff.net.0.proj.input +25-08-28 16:35:08 | D | - Calibrating transformer_blocks.3.ff.net.2.linear.input +25-08-28 16:35:09 | D | - Calibrating transformer_blocks.3.ff_context.net.0.proj.input +25-08-28 16:35:09 | D | - Calibrating transformer_blocks.3.ff_context.net.2.linear.input +25-08-28 16:35:09 | D | - Quantizing layer transformer_blocks.4 +25-08-28 16:35:09 | D | - Calibrating transformer_blocks.4.attn.to_q.input, transformer_blocks.4.attn.to_k.input, transformer_blocks.4.attn.to_v.input +25-08-28 16:35:09 | D | - Calibrating transformer_blocks.4.attn.add_q_proj.input, transformer_blocks.4.attn.add_k_proj.input, transformer_blocks.4.attn.add_v_proj.input +25-08-28 16:35:09 | D | - Calibrating transformer_blocks.4.attn.to_out.0.input +25-08-28 16:35:09 | D | - Calibrating transformer_blocks.4.attn.to_add_out.input +25-08-28 16:35:10 | D | - Calibrating transformer_blocks.4.ff.net.0.proj.input +25-08-28 16:35:10 | D | - Calibrating transformer_blocks.4.ff.net.2.linear.input +25-08-28 16:35:10 | D | - Calibrating transformer_blocks.4.ff_context.net.0.proj.input +25-08-28 16:35:10 | D | - Calibrating transformer_blocks.4.ff_context.net.2.linear.input +25-08-28 16:35:10 | D | - Quantizing layer transformer_blocks.5 +25-08-28 16:35:10 | D | - Calibrating transformer_blocks.5.attn.to_q.input, transformer_blocks.5.attn.to_k.input, transformer_blocks.5.attn.to_v.input +25-08-28 16:35:11 | D | - Calibrating transformer_blocks.5.attn.add_q_proj.input, transformer_blocks.5.attn.add_k_proj.input, transformer_blocks.5.attn.add_v_proj.input +25-08-28 16:35:11 | D | - Calibrating transformer_blocks.5.attn.to_out.0.input +25-08-28 16:35:11 | D | - Calibrating transformer_blocks.5.attn.to_add_out.input +25-08-28 16:35:11 | D | - Calibrating transformer_blocks.5.ff.net.0.proj.input +25-08-28 16:35:11 | D | - Calibrating transformer_blocks.5.ff.net.2.linear.input +25-08-28 16:35:12 | D | - Calibrating transformer_blocks.5.ff_context.net.0.proj.input +25-08-28 16:35:12 | D | - Calibrating transformer_blocks.5.ff_context.net.2.linear.input +25-08-28 16:35:12 | D | - Quantizing layer transformer_blocks.6 +25-08-28 16:35:12 | D | - Calibrating transformer_blocks.6.attn.to_q.input, transformer_blocks.6.attn.to_k.input, transformer_blocks.6.attn.to_v.input +25-08-28 16:35:12 | D | - Calibrating transformer_blocks.6.attn.add_q_proj.input, transformer_blocks.6.attn.add_k_proj.input, transformer_blocks.6.attn.add_v_proj.input +25-08-28 16:35:12 | D | - Calibrating transformer_blocks.6.attn.to_out.0.input +25-08-28 16:35:12 | D | - Calibrating transformer_blocks.6.attn.to_add_out.input +25-08-28 16:35:13 | D | - Calibrating transformer_blocks.6.ff.net.0.proj.input +25-08-28 16:35:13 | D | - Calibrating transformer_blocks.6.ff.net.2.linear.input +25-08-28 16:35:13 | D | - Calibrating transformer_blocks.6.ff_context.net.0.proj.input +25-08-28 16:35:13 | D | - Calibrating transformer_blocks.6.ff_context.net.2.linear.input +25-08-28 16:35:13 | D | - Quantizing layer transformer_blocks.7 +25-08-28 16:35:13 | D | - Calibrating transformer_blocks.7.attn.to_q.input, transformer_blocks.7.attn.to_k.input, transformer_blocks.7.attn.to_v.input +25-08-28 16:35:14 | D | - Calibrating transformer_blocks.7.attn.add_q_proj.input, transformer_blocks.7.attn.add_k_proj.input, transformer_blocks.7.attn.add_v_proj.input +25-08-28 16:35:14 | D | - Calibrating transformer_blocks.7.attn.to_out.0.input +25-08-28 16:35:14 | D | - Calibrating transformer_blocks.7.attn.to_add_out.input +25-08-28 16:35:14 | D | - Calibrating transformer_blocks.7.ff.net.0.proj.input +25-08-28 16:35:14 | D | - Calibrating transformer_blocks.7.ff.net.2.linear.input +25-08-28 16:35:14 | D | - Calibrating transformer_blocks.7.ff_context.net.0.proj.input +25-08-28 16:35:15 | D | - Calibrating transformer_blocks.7.ff_context.net.2.linear.input +25-08-28 16:35:15 | D | - Quantizing layer transformer_blocks.8 +25-08-28 16:35:15 | D | - Calibrating transformer_blocks.8.attn.to_q.input, transformer_blocks.8.attn.to_k.input, transformer_blocks.8.attn.to_v.input +25-08-28 16:35:15 | D | - Calibrating transformer_blocks.8.attn.add_q_proj.input, transformer_blocks.8.attn.add_k_proj.input, transformer_blocks.8.attn.add_v_proj.input +25-08-28 16:35:15 | D | - Calibrating transformer_blocks.8.attn.to_out.0.input +25-08-28 16:35:15 | D | - Calibrating transformer_blocks.8.attn.to_add_out.input +25-08-28 16:35:16 | D | - Calibrating transformer_blocks.8.ff.net.0.proj.input +25-08-28 16:35:16 | D | - Calibrating transformer_blocks.8.ff.net.2.linear.input +25-08-28 16:35:16 | D | - Calibrating transformer_blocks.8.ff_context.net.0.proj.input +25-08-28 16:35:16 | D | - Calibrating transformer_blocks.8.ff_context.net.2.linear.input +25-08-28 16:35:16 | D | - Quantizing layer transformer_blocks.9 +25-08-28 16:35:16 | D | - Calibrating transformer_blocks.9.attn.to_q.input, transformer_blocks.9.attn.to_k.input, transformer_blocks.9.attn.to_v.input +25-08-28 16:35:16 | D | - Calibrating transformer_blocks.9.attn.add_q_proj.input, transformer_blocks.9.attn.add_k_proj.input, transformer_blocks.9.attn.add_v_proj.input +25-08-28 16:35:17 | D | - Calibrating transformer_blocks.9.attn.to_out.0.input +25-08-28 16:35:17 | D | - Calibrating transformer_blocks.9.attn.to_add_out.input +25-08-28 16:35:17 | D | - Calibrating transformer_blocks.9.ff.net.0.proj.input +25-08-28 16:35:17 | D | - Calibrating transformer_blocks.9.ff.net.2.linear.input +25-08-28 16:35:17 | D | - Calibrating transformer_blocks.9.ff_context.net.0.proj.input +25-08-28 16:35:18 | D | - Calibrating transformer_blocks.9.ff_context.net.2.linear.input +25-08-28 16:35:18 | D | - Quantizing layer transformer_blocks.10 +25-08-28 16:35:18 | D | - Calibrating transformer_blocks.10.attn.to_q.input, transformer_blocks.10.attn.to_k.input, transformer_blocks.10.attn.to_v.input +25-08-28 16:35:18 | D | - Calibrating transformer_blocks.10.attn.add_q_proj.input, transformer_blocks.10.attn.add_k_proj.input, transformer_blocks.10.attn.add_v_proj.input +25-08-28 16:35:18 | D | - Calibrating transformer_blocks.10.attn.to_out.0.input +25-08-28 16:35:18 | D | - Calibrating transformer_blocks.10.attn.to_add_out.input +25-08-28 16:35:19 | D | - Calibrating transformer_blocks.10.ff.net.0.proj.input +25-08-28 16:35:19 | D | - Calibrating transformer_blocks.10.ff.net.2.linear.input +25-08-28 16:35:19 | D | - Calibrating transformer_blocks.10.ff_context.net.0.proj.input +25-08-28 16:35:19 | D | - Calibrating transformer_blocks.10.ff_context.net.2.linear.input +25-08-28 16:35:19 | D | - Quantizing layer transformer_blocks.11 +25-08-28 16:35:19 | D | - Calibrating transformer_blocks.11.attn.to_q.input, transformer_blocks.11.attn.to_k.input, transformer_blocks.11.attn.to_v.input +25-08-28 16:35:19 | D | - Calibrating transformer_blocks.11.attn.add_q_proj.input, transformer_blocks.11.attn.add_k_proj.input, transformer_blocks.11.attn.add_v_proj.input +25-08-28 16:35:20 | D | - Calibrating transformer_blocks.11.attn.to_out.0.input +25-08-28 16:35:20 | D | - Calibrating transformer_blocks.11.attn.to_add_out.input +25-08-28 16:35:20 | D | - Calibrating transformer_blocks.11.ff.net.0.proj.input +25-08-28 16:35:20 | D | - Calibrating transformer_blocks.11.ff.net.2.linear.input +25-08-28 16:35:20 | D | - Calibrating transformer_blocks.11.ff_context.net.0.proj.input +25-08-28 16:35:21 | D | - Calibrating transformer_blocks.11.ff_context.net.2.linear.input +25-08-28 16:35:21 | D | - Quantizing layer transformer_blocks.12 +25-08-28 16:35:21 | D | - Calibrating transformer_blocks.12.attn.to_q.input, transformer_blocks.12.attn.to_k.input, transformer_blocks.12.attn.to_v.input +25-08-28 16:35:21 | D | - Calibrating transformer_blocks.12.attn.add_q_proj.input, transformer_blocks.12.attn.add_k_proj.input, transformer_blocks.12.attn.add_v_proj.input +25-08-28 16:35:21 | D | - Calibrating transformer_blocks.12.attn.to_out.0.input +25-08-28 16:35:21 | D | - Calibrating transformer_blocks.12.attn.to_add_out.input +25-08-28 16:35:22 | D | - Calibrating transformer_blocks.12.ff.net.0.proj.input +25-08-28 16:35:22 | D | - Calibrating transformer_blocks.12.ff.net.2.linear.input +25-08-28 16:35:22 | D | - Calibrating transformer_blocks.12.ff_context.net.0.proj.input +25-08-28 16:35:22 | D | - Calibrating transformer_blocks.12.ff_context.net.2.linear.input +25-08-28 16:35:22 | D | - Quantizing layer transformer_blocks.13 +25-08-28 16:35:22 | D | - Calibrating transformer_blocks.13.attn.to_q.input, transformer_blocks.13.attn.to_k.input, transformer_blocks.13.attn.to_v.input +25-08-28 16:35:22 | D | - Calibrating transformer_blocks.13.attn.add_q_proj.input, transformer_blocks.13.attn.add_k_proj.input, transformer_blocks.13.attn.add_v_proj.input +25-08-28 16:35:23 | D | - Calibrating transformer_blocks.13.attn.to_out.0.input +25-08-28 16:35:23 | D | - Calibrating transformer_blocks.13.attn.to_add_out.input +25-08-28 16:35:23 | D | - Calibrating transformer_blocks.13.ff.net.0.proj.input +25-08-28 16:35:23 | D | - Calibrating transformer_blocks.13.ff.net.2.linear.input +25-08-28 16:35:23 | D | - Calibrating transformer_blocks.13.ff_context.net.0.proj.input +25-08-28 16:35:24 | D | - Calibrating transformer_blocks.13.ff_context.net.2.linear.input +25-08-28 16:35:24 | D | - Quantizing layer transformer_blocks.14 +25-08-28 16:35:24 | D | - Calibrating transformer_blocks.14.attn.to_q.input, transformer_blocks.14.attn.to_k.input, transformer_blocks.14.attn.to_v.input +25-08-28 16:35:24 | D | - Calibrating transformer_blocks.14.attn.add_q_proj.input, transformer_blocks.14.attn.add_k_proj.input, transformer_blocks.14.attn.add_v_proj.input +25-08-28 16:35:24 | D | - Calibrating transformer_blocks.14.attn.to_out.0.input +25-08-28 16:35:24 | D | - Calibrating transformer_blocks.14.attn.to_add_out.input +25-08-28 16:35:24 | D | - Calibrating transformer_blocks.14.ff.net.0.proj.input +25-08-28 16:35:25 | D | - Calibrating transformer_blocks.14.ff.net.2.linear.input +25-08-28 16:35:25 | D | - Calibrating transformer_blocks.14.ff_context.net.0.proj.input +25-08-28 16:35:25 | D | - Calibrating transformer_blocks.14.ff_context.net.2.linear.input +25-08-28 16:35:25 | D | - Quantizing layer transformer_blocks.15 +25-08-28 16:35:25 | D | - Calibrating transformer_blocks.15.attn.to_q.input, transformer_blocks.15.attn.to_k.input, transformer_blocks.15.attn.to_v.input +25-08-28 16:35:25 | D | - Calibrating transformer_blocks.15.attn.add_q_proj.input, transformer_blocks.15.attn.add_k_proj.input, transformer_blocks.15.attn.add_v_proj.input +25-08-28 16:35:26 | D | - Calibrating transformer_blocks.15.attn.to_out.0.input +25-08-28 16:35:26 | D | - Calibrating transformer_blocks.15.attn.to_add_out.input +25-08-28 16:35:26 | D | - Calibrating transformer_blocks.15.ff.net.0.proj.input +25-08-28 16:35:26 | D | - Calibrating transformer_blocks.15.ff.net.2.linear.input +25-08-28 16:35:26 | D | - Calibrating transformer_blocks.15.ff_context.net.0.proj.input +25-08-28 16:35:27 | D | - Calibrating transformer_blocks.15.ff_context.net.2.linear.input +25-08-28 16:35:27 | D | - Quantizing layer transformer_blocks.16 +25-08-28 16:35:27 | D | - Calibrating transformer_blocks.16.attn.to_q.input, transformer_blocks.16.attn.to_k.input, transformer_blocks.16.attn.to_v.input +25-08-28 16:35:27 | D | - Calibrating transformer_blocks.16.attn.add_q_proj.input, transformer_blocks.16.attn.add_k_proj.input, transformer_blocks.16.attn.add_v_proj.input +25-08-28 16:35:27 | D | - Calibrating transformer_blocks.16.attn.to_out.0.input +25-08-28 16:35:27 | D | - Calibrating transformer_blocks.16.attn.to_add_out.input +25-08-28 16:35:27 | D | - Calibrating transformer_blocks.16.ff.net.0.proj.input +25-08-28 16:35:28 | D | - Calibrating transformer_blocks.16.ff.net.2.linear.input +25-08-28 16:35:28 | D | - Calibrating transformer_blocks.16.ff_context.net.0.proj.input +25-08-28 16:35:28 | D | - Calibrating transformer_blocks.16.ff_context.net.2.linear.input +25-08-28 16:35:28 | D | - Quantizing layer transformer_blocks.17 +25-08-28 16:35:28 | D | - Calibrating transformer_blocks.17.attn.to_q.input, transformer_blocks.17.attn.to_k.input, transformer_blocks.17.attn.to_v.input +25-08-28 16:35:28 | D | - Calibrating transformer_blocks.17.attn.add_q_proj.input, transformer_blocks.17.attn.add_k_proj.input, transformer_blocks.17.attn.add_v_proj.input +25-08-28 16:35:29 | D | - Calibrating transformer_blocks.17.attn.to_out.0.input +25-08-28 16:35:29 | D | - Calibrating transformer_blocks.17.attn.to_add_out.input +25-08-28 16:35:29 | D | - Calibrating transformer_blocks.17.ff.net.0.proj.input +25-08-28 16:35:29 | D | - Calibrating transformer_blocks.17.ff.net.2.linear.input +25-08-28 16:35:29 | D | - Calibrating transformer_blocks.17.ff_context.net.0.proj.input +25-08-28 16:35:30 | D | - Calibrating transformer_blocks.17.ff_context.net.2.linear.input +25-08-28 16:35:30 | D | - Quantizing layer transformer_blocks.18 +25-08-28 16:35:30 | D | - Calibrating transformer_blocks.18.attn.to_q.input, transformer_blocks.18.attn.to_k.input, transformer_blocks.18.attn.to_v.input +25-08-28 16:35:30 | D | - Calibrating transformer_blocks.18.attn.add_q_proj.input, transformer_blocks.18.attn.add_k_proj.input, transformer_blocks.18.attn.add_v_proj.input +25-08-28 16:35:30 | D | - Calibrating transformer_blocks.18.attn.to_out.0.input +25-08-28 16:35:30 | D | - Calibrating transformer_blocks.18.attn.to_add_out.input +25-08-28 16:35:30 | D | - Calibrating transformer_blocks.18.ff.net.0.proj.input +25-08-28 16:35:31 | D | - Calibrating transformer_blocks.18.ff.net.2.linear.input +25-08-28 16:35:31 | D | - Calibrating transformer_blocks.18.ff_context.net.0.proj.input +25-08-28 16:35:31 | D | - Calibrating transformer_blocks.18.ff_context.net.2.linear.input +25-08-28 16:35:31 | D | - Quantizing layer single_transformer_blocks.0 +25-08-28 16:35:31 | D | - Calibrating single_transformer_blocks.0.attn.to_q.input, single_transformer_blocks.0.attn.to_k.input, single_transformer_blocks.0.attn.to_v.input, single_transformer_blocks.0.proj_mlp.input +25-08-28 16:35:31 | D | - Calibrating single_transformer_blocks.0.proj_out.linears.0.input +25-08-28 16:35:32 | D | - Calibrating single_transformer_blocks.0.proj_out.linears.1.linear.input +25-08-28 16:35:32 | D | - Quantizing layer single_transformer_blocks.1 +25-08-28 16:35:32 | D | - Calibrating single_transformer_blocks.1.attn.to_q.input, single_transformer_blocks.1.attn.to_k.input, single_transformer_blocks.1.attn.to_v.input, single_transformer_blocks.1.proj_mlp.input +25-08-28 16:35:32 | D | - Calibrating single_transformer_blocks.1.proj_out.linears.0.input +25-08-28 16:35:32 | D | - Calibrating single_transformer_blocks.1.proj_out.linears.1.linear.input +25-08-28 16:35:32 | D | - Quantizing layer single_transformer_blocks.2 +25-08-28 16:35:32 | D | - Calibrating single_transformer_blocks.2.attn.to_q.input, single_transformer_blocks.2.attn.to_k.input, single_transformer_blocks.2.attn.to_v.input, single_transformer_blocks.2.proj_mlp.input +25-08-28 16:35:33 | D | - Calibrating single_transformer_blocks.2.proj_out.linears.0.input +25-08-28 16:35:33 | D | - Calibrating single_transformer_blocks.2.proj_out.linears.1.linear.input +25-08-28 16:35:33 | D | - Quantizing layer single_transformer_blocks.3 +25-08-28 16:35:33 | D | - Calibrating single_transformer_blocks.3.attn.to_q.input, single_transformer_blocks.3.attn.to_k.input, single_transformer_blocks.3.attn.to_v.input, single_transformer_blocks.3.proj_mlp.input +25-08-28 16:35:33 | D | - Calibrating single_transformer_blocks.3.proj_out.linears.0.input +25-08-28 16:35:33 | D | - Calibrating single_transformer_blocks.3.proj_out.linears.1.linear.input +25-08-28 16:35:33 | D | - Quantizing layer single_transformer_blocks.4 +25-08-28 16:35:33 | D | - Calibrating single_transformer_blocks.4.attn.to_q.input, single_transformer_blocks.4.attn.to_k.input, single_transformer_blocks.4.attn.to_v.input, single_transformer_blocks.4.proj_mlp.input +25-08-28 16:35:34 | D | - Calibrating single_transformer_blocks.4.proj_out.linears.0.input +25-08-28 16:35:34 | D | - Calibrating single_transformer_blocks.4.proj_out.linears.1.linear.input +25-08-28 16:35:34 | D | - Quantizing layer single_transformer_blocks.5 +25-08-28 16:35:34 | D | - Calibrating single_transformer_blocks.5.attn.to_q.input, single_transformer_blocks.5.attn.to_k.input, single_transformer_blocks.5.attn.to_v.input, single_transformer_blocks.5.proj_mlp.input +25-08-28 16:35:34 | D | - Calibrating single_transformer_blocks.5.proj_out.linears.0.input +25-08-28 16:35:34 | D | - Calibrating single_transformer_blocks.5.proj_out.linears.1.linear.input +25-08-28 16:35:35 | D | - Quantizing layer single_transformer_blocks.6 +25-08-28 16:35:35 | D | - Calibrating single_transformer_blocks.6.attn.to_q.input, single_transformer_blocks.6.attn.to_k.input, single_transformer_blocks.6.attn.to_v.input, single_transformer_blocks.6.proj_mlp.input +25-08-28 16:35:35 | D | - Calibrating single_transformer_blocks.6.proj_out.linears.0.input +25-08-28 16:35:35 | D | - Calibrating single_transformer_blocks.6.proj_out.linears.1.linear.input +25-08-28 16:35:35 | D | - Quantizing layer single_transformer_blocks.7 +25-08-28 16:35:35 | D | - Calibrating single_transformer_blocks.7.attn.to_q.input, single_transformer_blocks.7.attn.to_k.input, single_transformer_blocks.7.attn.to_v.input, single_transformer_blocks.7.proj_mlp.input +25-08-28 16:35:35 | D | - Calibrating single_transformer_blocks.7.proj_out.linears.0.input +25-08-28 16:35:36 | D | - Calibrating single_transformer_blocks.7.proj_out.linears.1.linear.input +25-08-28 16:35:36 | D | - Quantizing layer single_transformer_blocks.8 +25-08-28 16:35:36 | D | - Calibrating single_transformer_blocks.8.attn.to_q.input, single_transformer_blocks.8.attn.to_k.input, single_transformer_blocks.8.attn.to_v.input, single_transformer_blocks.8.proj_mlp.input +25-08-28 16:35:36 | D | - Calibrating single_transformer_blocks.8.proj_out.linears.0.input +25-08-28 16:35:36 | D | - Calibrating single_transformer_blocks.8.proj_out.linears.1.linear.input +25-08-28 16:35:36 | D | - Quantizing layer single_transformer_blocks.9 +25-08-28 16:35:36 | D | - Calibrating single_transformer_blocks.9.attn.to_q.input, single_transformer_blocks.9.attn.to_k.input, single_transformer_blocks.9.attn.to_v.input, single_transformer_blocks.9.proj_mlp.input +25-08-28 16:35:36 | D | - Calibrating single_transformer_blocks.9.proj_out.linears.0.input +25-08-28 16:35:37 | D | - Calibrating single_transformer_blocks.9.proj_out.linears.1.linear.input +25-08-28 16:35:37 | D | - Quantizing layer single_transformer_blocks.10 +25-08-28 16:35:37 | D | - Calibrating single_transformer_blocks.10.attn.to_q.input, single_transformer_blocks.10.attn.to_k.input, single_transformer_blocks.10.attn.to_v.input, single_transformer_blocks.10.proj_mlp.input +25-08-28 16:35:37 | D | - Calibrating single_transformer_blocks.10.proj_out.linears.0.input +25-08-28 16:35:37 | D | - Calibrating single_transformer_blocks.10.proj_out.linears.1.linear.input +25-08-28 16:35:37 | D | - Quantizing layer single_transformer_blocks.11 +25-08-28 16:35:37 | D | - Calibrating single_transformer_blocks.11.attn.to_q.input, single_transformer_blocks.11.attn.to_k.input, single_transformer_blocks.11.attn.to_v.input, single_transformer_blocks.11.proj_mlp.input +25-08-28 16:35:38 | D | - Calibrating single_transformer_blocks.11.proj_out.linears.0.input +25-08-28 16:35:38 | D | - Calibrating single_transformer_blocks.11.proj_out.linears.1.linear.input +25-08-28 16:35:38 | D | - Quantizing layer single_transformer_blocks.12 +25-08-28 16:35:38 | D | - Calibrating single_transformer_blocks.12.attn.to_q.input, single_transformer_blocks.12.attn.to_k.input, single_transformer_blocks.12.attn.to_v.input, single_transformer_blocks.12.proj_mlp.input +25-08-28 16:35:38 | D | - Calibrating single_transformer_blocks.12.proj_out.linears.0.input +25-08-28 16:35:38 | D | - Calibrating single_transformer_blocks.12.proj_out.linears.1.linear.input +25-08-28 16:35:38 | D | - Quantizing layer single_transformer_blocks.13 +25-08-28 16:35:38 | D | - Calibrating single_transformer_blocks.13.attn.to_q.input, single_transformer_blocks.13.attn.to_k.input, single_transformer_blocks.13.attn.to_v.input, single_transformer_blocks.13.proj_mlp.input +25-08-28 16:35:39 | D | - Calibrating single_transformer_blocks.13.proj_out.linears.0.input +25-08-28 16:35:39 | D | - Calibrating single_transformer_blocks.13.proj_out.linears.1.linear.input +25-08-28 16:35:39 | D | - Quantizing layer single_transformer_blocks.14 +25-08-28 16:35:39 | D | - Calibrating single_transformer_blocks.14.attn.to_q.input, single_transformer_blocks.14.attn.to_k.input, single_transformer_blocks.14.attn.to_v.input, single_transformer_blocks.14.proj_mlp.input +25-08-28 16:35:39 | D | - Calibrating single_transformer_blocks.14.proj_out.linears.0.input +25-08-28 16:35:39 | D | - Calibrating single_transformer_blocks.14.proj_out.linears.1.linear.input +25-08-28 16:35:40 | D | - Quantizing layer single_transformer_blocks.15 +25-08-28 16:35:40 | D | - Calibrating single_transformer_blocks.15.attn.to_q.input, single_transformer_blocks.15.attn.to_k.input, single_transformer_blocks.15.attn.to_v.input, single_transformer_blocks.15.proj_mlp.input +25-08-28 16:35:40 | D | - Calibrating single_transformer_blocks.15.proj_out.linears.0.input +25-08-28 16:35:40 | D | - Calibrating single_transformer_blocks.15.proj_out.linears.1.linear.input +25-08-28 16:35:40 | D | - Quantizing layer single_transformer_blocks.16 +25-08-28 16:35:40 | D | - Calibrating single_transformer_blocks.16.attn.to_q.input, single_transformer_blocks.16.attn.to_k.input, single_transformer_blocks.16.attn.to_v.input, single_transformer_blocks.16.proj_mlp.input +25-08-28 16:35:40 | D | - Calibrating single_transformer_blocks.16.proj_out.linears.0.input +25-08-28 16:35:41 | D | - Calibrating single_transformer_blocks.16.proj_out.linears.1.linear.input +25-08-28 16:35:41 | D | - Quantizing layer single_transformer_blocks.17 +25-08-28 16:35:41 | D | - Calibrating single_transformer_blocks.17.attn.to_q.input, single_transformer_blocks.17.attn.to_k.input, single_transformer_blocks.17.attn.to_v.input, single_transformer_blocks.17.proj_mlp.input +25-08-28 16:35:41 | D | - Calibrating single_transformer_blocks.17.proj_out.linears.0.input +25-08-28 16:35:41 | D | - Calibrating single_transformer_blocks.17.proj_out.linears.1.linear.input +25-08-28 16:35:41 | D | - Quantizing layer single_transformer_blocks.18 +25-08-28 16:35:41 | D | - Calibrating single_transformer_blocks.18.attn.to_q.input, single_transformer_blocks.18.attn.to_k.input, single_transformer_blocks.18.attn.to_v.input, single_transformer_blocks.18.proj_mlp.input +25-08-28 16:35:41 | D | - Calibrating single_transformer_blocks.18.proj_out.linears.0.input +25-08-28 16:35:42 | D | - Calibrating single_transformer_blocks.18.proj_out.linears.1.linear.input +25-08-28 16:35:42 | D | - Quantizing layer single_transformer_blocks.19 +25-08-28 16:35:42 | D | - Calibrating single_transformer_blocks.19.attn.to_q.input, single_transformer_blocks.19.attn.to_k.input, single_transformer_blocks.19.attn.to_v.input, single_transformer_blocks.19.proj_mlp.input +25-08-28 16:35:42 | D | - Calibrating single_transformer_blocks.19.proj_out.linears.0.input +25-08-28 16:35:42 | D | - Calibrating single_transformer_blocks.19.proj_out.linears.1.linear.input +25-08-28 16:35:42 | D | - Quantizing layer single_transformer_blocks.20 +25-08-28 16:35:42 | D | - Calibrating single_transformer_blocks.20.attn.to_q.input, single_transformer_blocks.20.attn.to_k.input, single_transformer_blocks.20.attn.to_v.input, single_transformer_blocks.20.proj_mlp.input +25-08-28 16:35:43 | D | - Calibrating single_transformer_blocks.20.proj_out.linears.0.input +25-08-28 16:35:43 | D | - Calibrating single_transformer_blocks.20.proj_out.linears.1.linear.input +25-08-28 16:35:43 | D | - Quantizing layer single_transformer_blocks.21 +25-08-28 16:35:43 | D | - Calibrating single_transformer_blocks.21.attn.to_q.input, single_transformer_blocks.21.attn.to_k.input, single_transformer_blocks.21.attn.to_v.input, single_transformer_blocks.21.proj_mlp.input +25-08-28 16:35:43 | D | - Calibrating single_transformer_blocks.21.proj_out.linears.0.input +25-08-28 16:35:43 | D | - Calibrating single_transformer_blocks.21.proj_out.linears.1.linear.input +25-08-28 16:35:43 | D | - Quantizing layer single_transformer_blocks.22 +25-08-28 16:35:43 | D | - Calibrating single_transformer_blocks.22.attn.to_q.input, single_transformer_blocks.22.attn.to_k.input, single_transformer_blocks.22.attn.to_v.input, single_transformer_blocks.22.proj_mlp.input +25-08-28 16:35:44 | D | - Calibrating single_transformer_blocks.22.proj_out.linears.0.input +25-08-28 16:35:44 | D | - Calibrating single_transformer_blocks.22.proj_out.linears.1.linear.input +25-08-28 16:35:44 | D | - Quantizing layer single_transformer_blocks.23 +25-08-28 16:35:44 | D | - Calibrating single_transformer_blocks.23.attn.to_q.input, single_transformer_blocks.23.attn.to_k.input, single_transformer_blocks.23.attn.to_v.input, single_transformer_blocks.23.proj_mlp.input +25-08-28 16:35:44 | D | - Calibrating single_transformer_blocks.23.proj_out.linears.0.input +25-08-28 16:35:44 | D | - Calibrating single_transformer_blocks.23.proj_out.linears.1.linear.input +25-08-28 16:35:45 | D | - Quantizing layer single_transformer_blocks.24 +25-08-28 16:35:45 | D | - Calibrating single_transformer_blocks.24.attn.to_q.input, single_transformer_blocks.24.attn.to_k.input, single_transformer_blocks.24.attn.to_v.input, single_transformer_blocks.24.proj_mlp.input +25-08-28 16:35:45 | D | - Calibrating single_transformer_blocks.24.proj_out.linears.0.input +25-08-28 16:35:45 | D | - Calibrating single_transformer_blocks.24.proj_out.linears.1.linear.input +25-08-28 16:35:45 | D | - Quantizing layer single_transformer_blocks.25 +25-08-28 16:35:45 | D | - Calibrating single_transformer_blocks.25.attn.to_q.input, single_transformer_blocks.25.attn.to_k.input, single_transformer_blocks.25.attn.to_v.input, single_transformer_blocks.25.proj_mlp.input +25-08-28 16:35:45 | D | - Calibrating single_transformer_blocks.25.proj_out.linears.0.input +25-08-28 16:35:46 | D | - Calibrating single_transformer_blocks.25.proj_out.linears.1.linear.input +25-08-28 16:35:46 | D | - Quantizing layer single_transformer_blocks.26 +25-08-28 16:35:46 | D | - Calibrating single_transformer_blocks.26.attn.to_q.input, single_transformer_blocks.26.attn.to_k.input, single_transformer_blocks.26.attn.to_v.input, single_transformer_blocks.26.proj_mlp.input +25-08-28 16:35:46 | D | - Calibrating single_transformer_blocks.26.proj_out.linears.0.input +25-08-28 16:35:46 | D | - Calibrating single_transformer_blocks.26.proj_out.linears.1.linear.input +25-08-28 16:35:46 | D | - Quantizing layer single_transformer_blocks.27 +25-08-28 16:35:46 | D | - Calibrating single_transformer_blocks.27.attn.to_q.input, single_transformer_blocks.27.attn.to_k.input, single_transformer_blocks.27.attn.to_v.input, single_transformer_blocks.27.proj_mlp.input +25-08-28 16:35:46 | D | - Calibrating single_transformer_blocks.27.proj_out.linears.0.input +25-08-28 16:35:47 | D | - Calibrating single_transformer_blocks.27.proj_out.linears.1.linear.input +25-08-28 16:35:47 | D | - Quantizing layer single_transformer_blocks.28 +25-08-28 16:35:47 | D | - Calibrating single_transformer_blocks.28.attn.to_q.input, single_transformer_blocks.28.attn.to_k.input, single_transformer_blocks.28.attn.to_v.input, single_transformer_blocks.28.proj_mlp.input +25-08-28 16:35:47 | D | - Calibrating single_transformer_blocks.28.proj_out.linears.0.input +25-08-28 16:35:47 | D | - Calibrating single_transformer_blocks.28.proj_out.linears.1.linear.input +25-08-28 16:35:47 | D | - Quantizing layer single_transformer_blocks.29 +25-08-28 16:35:47 | D | - Calibrating single_transformer_blocks.29.attn.to_q.input, single_transformer_blocks.29.attn.to_k.input, single_transformer_blocks.29.attn.to_v.input, single_transformer_blocks.29.proj_mlp.input +25-08-28 16:35:48 | D | - Calibrating single_transformer_blocks.29.proj_out.linears.0.input +25-08-28 16:35:48 | D | - Calibrating single_transformer_blocks.29.proj_out.linears.1.linear.input +25-08-28 16:35:48 | D | - Quantizing layer single_transformer_blocks.30 +25-08-28 16:35:48 | D | - Calibrating single_transformer_blocks.30.attn.to_q.input, single_transformer_blocks.30.attn.to_k.input, single_transformer_blocks.30.attn.to_v.input, single_transformer_blocks.30.proj_mlp.input +25-08-28 16:35:48 | D | - Calibrating single_transformer_blocks.30.proj_out.linears.0.input +25-08-28 16:35:48 | D | - Calibrating single_transformer_blocks.30.proj_out.linears.1.linear.input +25-08-28 16:35:49 | D | - Quantizing layer single_transformer_blocks.31 +25-08-28 16:35:49 | D | - Calibrating single_transformer_blocks.31.attn.to_q.input, single_transformer_blocks.31.attn.to_k.input, single_transformer_blocks.31.attn.to_v.input, single_transformer_blocks.31.proj_mlp.input +25-08-28 16:35:49 | D | - Calibrating single_transformer_blocks.31.proj_out.linears.0.input +25-08-28 16:35:49 | D | - Calibrating single_transformer_blocks.31.proj_out.linears.1.linear.input +25-08-28 16:35:49 | D | - Quantizing layer single_transformer_blocks.32 +25-08-28 16:35:49 | D | - Calibrating single_transformer_blocks.32.attn.to_q.input, single_transformer_blocks.32.attn.to_k.input, single_transformer_blocks.32.attn.to_v.input, single_transformer_blocks.32.proj_mlp.input +25-08-28 16:35:49 | D | - Calibrating single_transformer_blocks.32.proj_out.linears.0.input +25-08-28 16:35:49 | D | - Calibrating single_transformer_blocks.32.proj_out.linears.1.linear.input +25-08-28 16:35:50 | D | - Quantizing layer single_transformer_blocks.33 +25-08-28 16:35:50 | D | - Calibrating single_transformer_blocks.33.attn.to_q.input, single_transformer_blocks.33.attn.to_k.input, single_transformer_blocks.33.attn.to_v.input, single_transformer_blocks.33.proj_mlp.input +25-08-28 16:35:50 | D | - Calibrating single_transformer_blocks.33.proj_out.linears.0.input +25-08-28 16:35:50 | D | - Calibrating single_transformer_blocks.33.proj_out.linears.1.linear.input +25-08-28 16:35:50 | D | - Quantizing layer single_transformer_blocks.34 +25-08-28 16:35:50 | D | - Calibrating single_transformer_blocks.34.attn.to_q.input, single_transformer_blocks.34.attn.to_k.input, single_transformer_blocks.34.attn.to_v.input, single_transformer_blocks.34.proj_mlp.input +25-08-28 16:35:50 | D | - Calibrating single_transformer_blocks.34.proj_out.linears.0.input +25-08-28 16:35:51 | D | - Calibrating single_transformer_blocks.34.proj_out.linears.1.linear.input +25-08-28 16:35:51 | D | - Quantizing layer single_transformer_blocks.35 +25-08-28 16:35:51 | D | - Calibrating single_transformer_blocks.35.attn.to_q.input, single_transformer_blocks.35.attn.to_k.input, single_transformer_blocks.35.attn.to_v.input, single_transformer_blocks.35.proj_mlp.input +25-08-28 16:35:51 | D | - Calibrating single_transformer_blocks.35.proj_out.linears.0.input +25-08-28 16:35:51 | D | - Calibrating single_transformer_blocks.35.proj_out.linears.1.linear.input +25-08-28 16:35:51 | D | - Quantizing layer single_transformer_blocks.36 +25-08-28 16:35:51 | D | - Calibrating single_transformer_blocks.36.attn.to_q.input, single_transformer_blocks.36.attn.to_k.input, single_transformer_blocks.36.attn.to_v.input, single_transformer_blocks.36.proj_mlp.input +25-08-28 16:35:52 | D | - Calibrating single_transformer_blocks.36.proj_out.linears.0.input +25-08-28 16:35:52 | D | - Calibrating single_transformer_blocks.36.proj_out.linears.1.linear.input +25-08-28 16:35:52 | D | - Quantizing layer single_transformer_blocks.37 +25-08-28 16:35:52 | D | - Calibrating single_transformer_blocks.37.attn.to_q.input, single_transformer_blocks.37.attn.to_k.input, single_transformer_blocks.37.attn.to_v.input, single_transformer_blocks.37.proj_mlp.input +25-08-28 16:35:52 | D | - Calibrating single_transformer_blocks.37.proj_out.linears.0.input +25-08-28 16:35:52 | D | - Calibrating single_transformer_blocks.37.proj_out.linears.1.linear.input +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.0.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.0.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.0.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.0.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.0.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.0.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.0.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.0.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.0.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.0.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.0.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.0.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.1.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.1.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.1.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.1.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.1.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.1.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.1.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.1.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.1.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.1.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.1.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.1.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.2.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.2.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.2.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.2.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.2.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.2.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.2.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.2.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.2.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.2.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.2.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.2.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.3.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.3.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.3.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.3.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.3.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.3.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.3.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.3.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.3.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.3.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.3.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.3.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.4.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.4.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.4.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.4.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.4.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.4.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.4.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.4.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.4.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.4.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.4.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.4.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.5.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.5.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.5.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.5.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.5.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.5.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.5.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.5.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.5.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.5.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.5.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.5.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.6.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.6.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.6.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.6.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.6.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.6.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.6.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.6.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.6.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.6.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.6.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.6.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.7.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.7.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.7.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.7.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.7.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.7.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.7.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.7.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.7.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.7.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.7.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.7.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.8.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.8.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.8.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.8.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.8.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.8.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.8.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.8.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.8.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.8.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.8.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.8.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.9.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.9.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.9.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.9.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.9.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.9.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.9.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.9.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.9.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.9.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.9.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.9.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.10.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.10.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.10.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.10.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.10.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.10.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.10.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.10.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.10.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.10.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.10.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.10.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.11.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.11.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.11.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.11.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.11.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.11.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.11.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.11.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.11.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.11.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.11.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.11.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.12.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.12.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.12.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.12.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.12.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.12.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.12.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.12.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.12.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.12.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.12.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.12.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.13.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.13.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.13.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.13.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.13.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.13.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.13.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.13.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.13.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.13.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.13.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.13.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.14.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.14.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.14.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.14.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.14.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.14.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.14.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.14.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.14.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.14.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.14.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.14.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.15.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.15.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.15.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.15.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.15.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.15.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.15.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.15.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.15.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.15.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.15.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.15.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.16.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.16.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.16.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.16.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.16.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.16.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.16.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.16.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.16.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.16.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.16.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.16.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.17.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.17.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.17.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.17.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.17.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.17.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.17.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.17.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.17.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.17.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.17.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.17.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.18.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.18.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.18.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.18.attn.add_q_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.18.attn.add_k_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.18.attn.add_v_proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.18.attn.to_out.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.18.attn.to_add_out (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.18.ff.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.18.ff.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.18.ff_context.net.0.proj (inputs) +25-08-28 16:35:52 | D | - Quantizing transformer_blocks.18.ff_context.net.2.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.0.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.0.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.0.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.0.proj_out.linears.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.0.proj_mlp (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.0.proj_out.linears.1.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.1.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.1.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.1.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.1.proj_out.linears.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.1.proj_mlp (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.1.proj_out.linears.1.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.2.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.2.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.2.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.2.proj_out.linears.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.2.proj_mlp (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.2.proj_out.linears.1.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.3.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.3.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.3.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.3.proj_out.linears.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.3.proj_mlp (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.3.proj_out.linears.1.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.4.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.4.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.4.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.4.proj_out.linears.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.4.proj_mlp (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.4.proj_out.linears.1.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.5.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.5.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.5.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.5.proj_out.linears.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.5.proj_mlp (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.5.proj_out.linears.1.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.6.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.6.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.6.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.6.proj_out.linears.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.6.proj_mlp (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.6.proj_out.linears.1.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.7.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.7.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.7.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.7.proj_out.linears.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.7.proj_mlp (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.7.proj_out.linears.1.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.8.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.8.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.8.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.8.proj_out.linears.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.8.proj_mlp (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.8.proj_out.linears.1.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.9.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.9.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.9.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.9.proj_out.linears.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.9.proj_mlp (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.9.proj_out.linears.1.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.10.attn.to_q (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.10.attn.to_k (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.10.attn.to_v (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.10.proj_out.linears.0 (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.10.proj_mlp (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.10.proj_out.linears.1.linear (inputs) +25-08-28 16:35:52 | D | - Quantizing single_transformer_blocks.11.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.11.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.11.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.11.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.11.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.11.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.12.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.12.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.12.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.12.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.12.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.12.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.13.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.13.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.13.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.13.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.13.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.13.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.14.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.14.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.14.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.14.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.14.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.14.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.15.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.15.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.15.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.15.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.15.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.15.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.16.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.16.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.16.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.16.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.16.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.16.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.17.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.17.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.17.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.17.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.17.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.17.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.18.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.18.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.18.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.18.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.18.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.18.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.19.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.19.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.19.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.19.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.19.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.19.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.20.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.20.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.20.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.20.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.20.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.20.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.21.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.21.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.21.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.21.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.21.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.21.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.22.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.22.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.22.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.22.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.22.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.22.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.23.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.23.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.23.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.23.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.23.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.23.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.24.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.24.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.24.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.24.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.24.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.24.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.25.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.25.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.25.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.25.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.25.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.25.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.26.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.26.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.26.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.26.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.26.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.26.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.27.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.27.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.27.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.27.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.27.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.27.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.28.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.28.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.28.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.28.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.28.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.28.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.29.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.29.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.29.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.29.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.29.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.29.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.30.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.30.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.30.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.30.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.30.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.30.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.31.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.31.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.31.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.31.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.31.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.31.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.32.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.32.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.32.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.32.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.32.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.32.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.33.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.33.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.33.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.33.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.33.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.33.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.34.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.34.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.34.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.34.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.34.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.34.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.35.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.35.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.35.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.35.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.35.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.35.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.36.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.36.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.36.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.36.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.36.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.36.proj_out.linears.1.linear (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.37.attn.to_q (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.37.attn.to_k (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.37.attn.to_v (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.37.proj_out.linears.0 (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.37.proj_mlp (inputs) +25-08-28 16:35:53 | D | - Quantizing single_transformer_blocks.37.proj_out.linears.1.linear (inputs)