jerryzh168 commited on
Commit
44b9297
·
verified ·
1 Parent(s): b5a2f00

Upload OPTForCausalLM

Browse files
Files changed (2) hide show
  1. config.json +162 -1
  2. pytorch_model.bin +2 -2
config.json CHANGED
@@ -146,7 +146,116 @@
146
  "_type": "Float8DynamicActivationFloat8WeightConfig",
147
  "_version": 2
148
  },
149
- "model.decoder.layers.*.self_attn.v_proj": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  "model.decoder.layers.3.self_attn.q_proj": {
151
  "_data": {
152
  "group_size": 128,
@@ -172,6 +281,58 @@
172
  },
173
  "_type": "Int4WeightOnlyConfig",
174
  "_version": 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  }
176
  }
177
  },
 
146
  "_type": "Float8DynamicActivationFloat8WeightConfig",
147
  "_version": 2
148
  },
149
+ "model.decoder.layers.*.self_attn.qkv_proj": {
150
+ "_data": {
151
+ "activation_dtype": {
152
+ "_data": "float8_e4m3fn",
153
+ "_type": "torch.dtype"
154
+ },
155
+ "activation_value_lb": null,
156
+ "activation_value_ub": null,
157
+ "granularity": [
158
+ {
159
+ "_data": {},
160
+ "_type": "PerRow",
161
+ "_version": 1
162
+ },
163
+ {
164
+ "_data": {},
165
+ "_type": "PerRow",
166
+ "_version": 1
167
+ }
168
+ ],
169
+ "kernel_preference": {
170
+ "_data": "AUTO",
171
+ "_type": "KernelPreference"
172
+ },
173
+ "mm_config": {
174
+ "_data": {
175
+ "emulate": false,
176
+ "pad_inner_dim": false,
177
+ "use_fast_accum": true
178
+ },
179
+ "_type": "Float8MMConfig",
180
+ "_version": 1
181
+ },
182
+ "set_inductor_config": true,
183
+ "weight_dtype": {
184
+ "_data": "float8_e4m3fn",
185
+ "_type": "torch.dtype"
186
+ }
187
+ },
188
+ "_type": "Float8DynamicActivationFloat8WeightConfig",
189
+ "_version": 2
190
+ },
191
+ "model.decoder.layers.*.self_attn.v_proj": {
192
+ "_data": {
193
+ "activation_dtype": {
194
+ "_data": "float8_e4m3fn",
195
+ "_type": "torch.dtype"
196
+ },
197
+ "activation_value_lb": null,
198
+ "activation_value_ub": null,
199
+ "granularity": [
200
+ {
201
+ "_data": {},
202
+ "_type": "PerRow",
203
+ "_version": 1
204
+ },
205
+ {
206
+ "_data": {},
207
+ "_type": "PerRow",
208
+ "_version": 1
209
+ }
210
+ ],
211
+ "kernel_preference": {
212
+ "_data": "AUTO",
213
+ "_type": "KernelPreference"
214
+ },
215
+ "mm_config": {
216
+ "_data": {
217
+ "emulate": false,
218
+ "pad_inner_dim": false,
219
+ "use_fast_accum": true
220
+ },
221
+ "_type": "Float8MMConfig",
222
+ "_version": 1
223
+ },
224
+ "set_inductor_config": true,
225
+ "weight_dtype": {
226
+ "_data": "float8_e4m3fn",
227
+ "_type": "torch.dtype"
228
+ }
229
+ },
230
+ "_type": "Float8DynamicActivationFloat8WeightConfig",
231
+ "_version": 2
232
+ },
233
+ "model.decoder.layers.3.self_attn.k_proj": {
234
+ "_data": {
235
+ "group_size": 128,
236
+ "int4_choose_qparams_algorithm": {
237
+ "_data": "TINYGEMM",
238
+ "_type": "Int4ChooseQParamsAlgorithm"
239
+ },
240
+ "int4_packing_format": "tile_packed_to_4d",
241
+ "layout": {
242
+ "_data": {
243
+ "inner_k_tiles": 8
244
+ },
245
+ "_type": "TensorCoreTiledLayout",
246
+ "_version": 1
247
+ },
248
+ "preserve_zero": null,
249
+ "set_inductor_config": true,
250
+ "use_hqq": false,
251
+ "zero_point_domain": {
252
+ "_data": "NONE",
253
+ "_type": "ZeroPointDomain"
254
+ }
255
+ },
256
+ "_type": "Int4WeightOnlyConfig",
257
+ "_version": 2
258
+ },
259
  "model.decoder.layers.3.self_attn.q_proj": {
260
  "_data": {
261
  "group_size": 128,
 
281
  },
282
  "_type": "Int4WeightOnlyConfig",
283
  "_version": 2
284
+ },
285
+ "model.decoder.layers.3.self_attn.qkv_proj": {
286
+ "_data": {
287
+ "group_size": 128,
288
+ "int4_choose_qparams_algorithm": {
289
+ "_data": "TINYGEMM",
290
+ "_type": "Int4ChooseQParamsAlgorithm"
291
+ },
292
+ "int4_packing_format": "tile_packed_to_4d",
293
+ "layout": {
294
+ "_data": {
295
+ "inner_k_tiles": 8
296
+ },
297
+ "_type": "TensorCoreTiledLayout",
298
+ "_version": 1
299
+ },
300
+ "preserve_zero": null,
301
+ "set_inductor_config": true,
302
+ "use_hqq": false,
303
+ "zero_point_domain": {
304
+ "_data": "NONE",
305
+ "_type": "ZeroPointDomain"
306
+ }
307
+ },
308
+ "_type": "Int4WeightOnlyConfig",
309
+ "_version": 2
310
+ },
311
+ "model.decoder.layers.3.self_attn.v_proj": {
312
+ "_data": {
313
+ "group_size": 128,
314
+ "int4_choose_qparams_algorithm": {
315
+ "_data": "TINYGEMM",
316
+ "_type": "Int4ChooseQParamsAlgorithm"
317
+ },
318
+ "int4_packing_format": "tile_packed_to_4d",
319
+ "layout": {
320
+ "_data": {
321
+ "inner_k_tiles": 8
322
+ },
323
+ "_type": "TensorCoreTiledLayout",
324
+ "_version": 1
325
+ },
326
+ "preserve_zero": null,
327
+ "set_inductor_config": true,
328
+ "use_hqq": false,
329
+ "zero_point_domain": {
330
+ "_data": "NONE",
331
+ "_type": "ZeroPointDomain"
332
+ }
333
+ },
334
+ "_type": "Int4WeightOnlyConfig",
335
+ "_version": 2
336
  }
337
  }
338
  },
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fcc8b6a9f362d0889f1bed6f31fa75d2ae0f4940ddc4f9b94a0de75200697db
3
- size 172790515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba0b5936d6428aeb848faa96c1a37bb8b3bb549620c7213d5bba28374782aff3
3
+ size 165404707