inferencerlabs commited on
Commit
8a4f534
·
verified ·
1 Parent(s): e9acc21

Upload model file

Browse files
Files changed (1) hide show
  1. config.json +607 -0
config.json ADDED
@@ -0,0 +1,607 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MiniMaxM2ForCausalLM"
4
+ ],
5
+ "attn_type_list": [
6
+ 1,
7
+ 1,
8
+ 1,
9
+ 1,
10
+ 1,
11
+ 1,
12
+ 1,
13
+ 1,
14
+ 1,
15
+ 1,
16
+ 1,
17
+ 1,
18
+ 1,
19
+ 1,
20
+ 1,
21
+ 1,
22
+ 1,
23
+ 1,
24
+ 1,
25
+ 1,
26
+ 1,
27
+ 1,
28
+ 1,
29
+ 1,
30
+ 1,
31
+ 1,
32
+ 1,
33
+ 1,
34
+ 1,
35
+ 1,
36
+ 1,
37
+ 1,
38
+ 1,
39
+ 1,
40
+ 1,
41
+ 1,
42
+ 1,
43
+ 1,
44
+ 1,
45
+ 1,
46
+ 1,
47
+ 1,
48
+ 1,
49
+ 1,
50
+ 1,
51
+ 1,
52
+ 1,
53
+ 1,
54
+ 1,
55
+ 1,
56
+ 1,
57
+ 1,
58
+ 1,
59
+ 1,
60
+ 1,
61
+ 1,
62
+ 1,
63
+ 1,
64
+ 1,
65
+ 1,
66
+ 1,
67
+ 1
68
+ ],
69
+ "auto_map": {
70
+ "AutoConfig": "configuration_minimax_m2.MiniMaxM2Config",
71
+ "AutoModelForCausalLM": "modeling_minimax_m2.MiniMaxM2ForCausalLM"
72
+ },
73
+ "eos_token_id": 200020,
74
+ "head_dim": 128,
75
+ "hidden_act": "silu",
76
+ "hidden_size": 3072,
77
+ "intermediate_size": 1536,
78
+ "max_position_embeddings": 196608,
79
+ "mlx-sanitized": "0.30.7",
80
+ "model_type": "minimax_m2",
81
+ "mtp_transformer_layers": 1,
82
+ "num_attention_heads": 48,
83
+ "num_experts_per_tok": 8,
84
+ "num_hidden_layers": 62,
85
+ "num_key_value_heads": 8,
86
+ "num_local_experts": 256,
87
+ "num_mtp_modules": 3,
88
+ "qk_norm_type": "per_layer",
89
+ "quantization": {
90
+ "group_size": 64,
91
+ "bits": 6,
92
+ "mode": "affine",
93
+ "model.layers.0.block_sparse_moe.gate": {
94
+ "group_size": 64,
95
+ "bits": 8
96
+ },
97
+ "model.layers.1.block_sparse_moe.gate": {
98
+ "group_size": 64,
99
+ "bits": 8
100
+ },
101
+ "model.layers.2.block_sparse_moe.gate": {
102
+ "group_size": 64,
103
+ "bits": 8
104
+ },
105
+ "model.layers.3.block_sparse_moe.gate": {
106
+ "group_size": 64,
107
+ "bits": 8
108
+ },
109
+ "model.layers.4.block_sparse_moe.gate": {
110
+ "group_size": 64,
111
+ "bits": 8
112
+ },
113
+ "model.layers.5.block_sparse_moe.gate": {
114
+ "group_size": 64,
115
+ "bits": 8
116
+ },
117
+ "model.layers.6.block_sparse_moe.gate": {
118
+ "group_size": 64,
119
+ "bits": 8
120
+ },
121
+ "model.layers.7.block_sparse_moe.gate": {
122
+ "group_size": 64,
123
+ "bits": 8
124
+ },
125
+ "model.layers.8.block_sparse_moe.gate": {
126
+ "group_size": 64,
127
+ "bits": 8
128
+ },
129
+ "model.layers.9.block_sparse_moe.gate": {
130
+ "group_size": 64,
131
+ "bits": 8
132
+ },
133
+ "model.layers.10.block_sparse_moe.gate": {
134
+ "group_size": 64,
135
+ "bits": 8
136
+ },
137
+ "model.layers.11.block_sparse_moe.gate": {
138
+ "group_size": 64,
139
+ "bits": 8
140
+ },
141
+ "model.layers.12.block_sparse_moe.gate": {
142
+ "group_size": 64,
143
+ "bits": 8
144
+ },
145
+ "model.layers.13.block_sparse_moe.gate": {
146
+ "group_size": 64,
147
+ "bits": 8
148
+ },
149
+ "model.layers.14.block_sparse_moe.gate": {
150
+ "group_size": 64,
151
+ "bits": 8
152
+ },
153
+ "model.layers.15.block_sparse_moe.gate": {
154
+ "group_size": 64,
155
+ "bits": 8
156
+ },
157
+ "model.layers.16.block_sparse_moe.gate": {
158
+ "group_size": 64,
159
+ "bits": 8
160
+ },
161
+ "model.layers.17.block_sparse_moe.gate": {
162
+ "group_size": 64,
163
+ "bits": 8
164
+ },
165
+ "model.layers.18.block_sparse_moe.gate": {
166
+ "group_size": 64,
167
+ "bits": 8
168
+ },
169
+ "model.layers.19.block_sparse_moe.gate": {
170
+ "group_size": 64,
171
+ "bits": 8
172
+ },
173
+ "model.layers.20.block_sparse_moe.gate": {
174
+ "group_size": 64,
175
+ "bits": 8
176
+ },
177
+ "model.layers.21.block_sparse_moe.gate": {
178
+ "group_size": 64,
179
+ "bits": 8
180
+ },
181
+ "model.layers.22.block_sparse_moe.gate": {
182
+ "group_size": 64,
183
+ "bits": 8
184
+ },
185
+ "model.layers.23.block_sparse_moe.gate": {
186
+ "group_size": 64,
187
+ "bits": 8
188
+ },
189
+ "model.layers.24.block_sparse_moe.gate": {
190
+ "group_size": 64,
191
+ "bits": 8
192
+ },
193
+ "model.layers.25.block_sparse_moe.gate": {
194
+ "group_size": 64,
195
+ "bits": 8
196
+ },
197
+ "model.layers.26.block_sparse_moe.gate": {
198
+ "group_size": 64,
199
+ "bits": 8
200
+ },
201
+ "model.layers.27.block_sparse_moe.gate": {
202
+ "group_size": 64,
203
+ "bits": 8
204
+ },
205
+ "model.layers.28.block_sparse_moe.gate": {
206
+ "group_size": 64,
207
+ "bits": 8
208
+ },
209
+ "model.layers.29.block_sparse_moe.gate": {
210
+ "group_size": 64,
211
+ "bits": 8
212
+ },
213
+ "model.layers.30.block_sparse_moe.gate": {
214
+ "group_size": 64,
215
+ "bits": 8
216
+ },
217
+ "model.layers.31.block_sparse_moe.gate": {
218
+ "group_size": 64,
219
+ "bits": 8
220
+ },
221
+ "model.layers.32.block_sparse_moe.gate": {
222
+ "group_size": 64,
223
+ "bits": 8
224
+ },
225
+ "model.layers.33.block_sparse_moe.gate": {
226
+ "group_size": 64,
227
+ "bits": 8
228
+ },
229
+ "model.layers.34.block_sparse_moe.gate": {
230
+ "group_size": 64,
231
+ "bits": 8
232
+ },
233
+ "model.layers.35.block_sparse_moe.gate": {
234
+ "group_size": 64,
235
+ "bits": 8
236
+ },
237
+ "model.layers.36.block_sparse_moe.gate": {
238
+ "group_size": 64,
239
+ "bits": 8
240
+ },
241
+ "model.layers.37.block_sparse_moe.gate": {
242
+ "group_size": 64,
243
+ "bits": 8
244
+ },
245
+ "model.layers.38.block_sparse_moe.gate": {
246
+ "group_size": 64,
247
+ "bits": 8
248
+ },
249
+ "model.layers.39.block_sparse_moe.gate": {
250
+ "group_size": 64,
251
+ "bits": 8
252
+ },
253
+ "model.layers.40.block_sparse_moe.gate": {
254
+ "group_size": 64,
255
+ "bits": 8
256
+ },
257
+ "model.layers.41.block_sparse_moe.gate": {
258
+ "group_size": 64,
259
+ "bits": 8
260
+ },
261
+ "model.layers.42.block_sparse_moe.gate": {
262
+ "group_size": 64,
263
+ "bits": 8
264
+ },
265
+ "model.layers.43.block_sparse_moe.gate": {
266
+ "group_size": 64,
267
+ "bits": 8
268
+ },
269
+ "model.layers.44.block_sparse_moe.gate": {
270
+ "group_size": 64,
271
+ "bits": 8
272
+ },
273
+ "model.layers.45.block_sparse_moe.gate": {
274
+ "group_size": 64,
275
+ "bits": 8
276
+ },
277
+ "model.layers.46.block_sparse_moe.gate": {
278
+ "group_size": 64,
279
+ "bits": 8
280
+ },
281
+ "model.layers.47.block_sparse_moe.gate": {
282
+ "group_size": 64,
283
+ "bits": 8
284
+ },
285
+ "model.layers.48.block_sparse_moe.gate": {
286
+ "group_size": 64,
287
+ "bits": 8
288
+ },
289
+ "model.layers.49.block_sparse_moe.gate": {
290
+ "group_size": 64,
291
+ "bits": 8
292
+ },
293
+ "model.layers.50.block_sparse_moe.gate": {
294
+ "group_size": 64,
295
+ "bits": 8
296
+ },
297
+ "model.layers.51.block_sparse_moe.gate": {
298
+ "group_size": 64,
299
+ "bits": 8
300
+ },
301
+ "model.layers.52.block_sparse_moe.gate": {
302
+ "group_size": 64,
303
+ "bits": 8
304
+ },
305
+ "model.layers.53.block_sparse_moe.gate": {
306
+ "group_size": 64,
307
+ "bits": 8
308
+ },
309
+ "model.layers.54.block_sparse_moe.gate": {
310
+ "group_size": 64,
311
+ "bits": 8
312
+ },
313
+ "model.layers.55.block_sparse_moe.gate": {
314
+ "group_size": 64,
315
+ "bits": 8
316
+ },
317
+ "model.layers.56.block_sparse_moe.gate": {
318
+ "group_size": 64,
319
+ "bits": 8
320
+ },
321
+ "model.layers.57.block_sparse_moe.gate": {
322
+ "group_size": 64,
323
+ "bits": 8
324
+ },
325
+ "model.layers.58.block_sparse_moe.gate": {
326
+ "group_size": 64,
327
+ "bits": 8
328
+ },
329
+ "model.layers.59.block_sparse_moe.gate": {
330
+ "group_size": 64,
331
+ "bits": 8
332
+ },
333
+ "model.layers.60.block_sparse_moe.gate": {
334
+ "group_size": 64,
335
+ "bits": 8
336
+ },
337
+ "model.layers.61.block_sparse_moe.gate": {
338
+ "group_size": 64,
339
+ "bits": 8
340
+ }
341
+ },
342
+ "quantization_config": {
343
+ "group_size": 64,
344
+ "bits": 6,
345
+ "mode": "affine",
346
+ "model.layers.0.block_sparse_moe.gate": {
347
+ "group_size": 64,
348
+ "bits": 8
349
+ },
350
+ "model.layers.1.block_sparse_moe.gate": {
351
+ "group_size": 64,
352
+ "bits": 8
353
+ },
354
+ "model.layers.2.block_sparse_moe.gate": {
355
+ "group_size": 64,
356
+ "bits": 8
357
+ },
358
+ "model.layers.3.block_sparse_moe.gate": {
359
+ "group_size": 64,
360
+ "bits": 8
361
+ },
362
+ "model.layers.4.block_sparse_moe.gate": {
363
+ "group_size": 64,
364
+ "bits": 8
365
+ },
366
+ "model.layers.5.block_sparse_moe.gate": {
367
+ "group_size": 64,
368
+ "bits": 8
369
+ },
370
+ "model.layers.6.block_sparse_moe.gate": {
371
+ "group_size": 64,
372
+ "bits": 8
373
+ },
374
+ "model.layers.7.block_sparse_moe.gate": {
375
+ "group_size": 64,
376
+ "bits": 8
377
+ },
378
+ "model.layers.8.block_sparse_moe.gate": {
379
+ "group_size": 64,
380
+ "bits": 8
381
+ },
382
+ "model.layers.9.block_sparse_moe.gate": {
383
+ "group_size": 64,
384
+ "bits": 8
385
+ },
386
+ "model.layers.10.block_sparse_moe.gate": {
387
+ "group_size": 64,
388
+ "bits": 8
389
+ },
390
+ "model.layers.11.block_sparse_moe.gate": {
391
+ "group_size": 64,
392
+ "bits": 8
393
+ },
394
+ "model.layers.12.block_sparse_moe.gate": {
395
+ "group_size": 64,
396
+ "bits": 8
397
+ },
398
+ "model.layers.13.block_sparse_moe.gate": {
399
+ "group_size": 64,
400
+ "bits": 8
401
+ },
402
+ "model.layers.14.block_sparse_moe.gate": {
403
+ "group_size": 64,
404
+ "bits": 8
405
+ },
406
+ "model.layers.15.block_sparse_moe.gate": {
407
+ "group_size": 64,
408
+ "bits": 8
409
+ },
410
+ "model.layers.16.block_sparse_moe.gate": {
411
+ "group_size": 64,
412
+ "bits": 8
413
+ },
414
+ "model.layers.17.block_sparse_moe.gate": {
415
+ "group_size": 64,
416
+ "bits": 8
417
+ },
418
+ "model.layers.18.block_sparse_moe.gate": {
419
+ "group_size": 64,
420
+ "bits": 8
421
+ },
422
+ "model.layers.19.block_sparse_moe.gate": {
423
+ "group_size": 64,
424
+ "bits": 8
425
+ },
426
+ "model.layers.20.block_sparse_moe.gate": {
427
+ "group_size": 64,
428
+ "bits": 8
429
+ },
430
+ "model.layers.21.block_sparse_moe.gate": {
431
+ "group_size": 64,
432
+ "bits": 8
433
+ },
434
+ "model.layers.22.block_sparse_moe.gate": {
435
+ "group_size": 64,
436
+ "bits": 8
437
+ },
438
+ "model.layers.23.block_sparse_moe.gate": {
439
+ "group_size": 64,
440
+ "bits": 8
441
+ },
442
+ "model.layers.24.block_sparse_moe.gate": {
443
+ "group_size": 64,
444
+ "bits": 8
445
+ },
446
+ "model.layers.25.block_sparse_moe.gate": {
447
+ "group_size": 64,
448
+ "bits": 8
449
+ },
450
+ "model.layers.26.block_sparse_moe.gate": {
451
+ "group_size": 64,
452
+ "bits": 8
453
+ },
454
+ "model.layers.27.block_sparse_moe.gate": {
455
+ "group_size": 64,
456
+ "bits": 8
457
+ },
458
+ "model.layers.28.block_sparse_moe.gate": {
459
+ "group_size": 64,
460
+ "bits": 8
461
+ },
462
+ "model.layers.29.block_sparse_moe.gate": {
463
+ "group_size": 64,
464
+ "bits": 8
465
+ },
466
+ "model.layers.30.block_sparse_moe.gate": {
467
+ "group_size": 64,
468
+ "bits": 8
469
+ },
470
+ "model.layers.31.block_sparse_moe.gate": {
471
+ "group_size": 64,
472
+ "bits": 8
473
+ },
474
+ "model.layers.32.block_sparse_moe.gate": {
475
+ "group_size": 64,
476
+ "bits": 8
477
+ },
478
+ "model.layers.33.block_sparse_moe.gate": {
479
+ "group_size": 64,
480
+ "bits": 8
481
+ },
482
+ "model.layers.34.block_sparse_moe.gate": {
483
+ "group_size": 64,
484
+ "bits": 8
485
+ },
486
+ "model.layers.35.block_sparse_moe.gate": {
487
+ "group_size": 64,
488
+ "bits": 8
489
+ },
490
+ "model.layers.36.block_sparse_moe.gate": {
491
+ "group_size": 64,
492
+ "bits": 8
493
+ },
494
+ "model.layers.37.block_sparse_moe.gate": {
495
+ "group_size": 64,
496
+ "bits": 8
497
+ },
498
+ "model.layers.38.block_sparse_moe.gate": {
499
+ "group_size": 64,
500
+ "bits": 8
501
+ },
502
+ "model.layers.39.block_sparse_moe.gate": {
503
+ "group_size": 64,
504
+ "bits": 8
505
+ },
506
+ "model.layers.40.block_sparse_moe.gate": {
507
+ "group_size": 64,
508
+ "bits": 8
509
+ },
510
+ "model.layers.41.block_sparse_moe.gate": {
511
+ "group_size": 64,
512
+ "bits": 8
513
+ },
514
+ "model.layers.42.block_sparse_moe.gate": {
515
+ "group_size": 64,
516
+ "bits": 8
517
+ },
518
+ "model.layers.43.block_sparse_moe.gate": {
519
+ "group_size": 64,
520
+ "bits": 8
521
+ },
522
+ "model.layers.44.block_sparse_moe.gate": {
523
+ "group_size": 64,
524
+ "bits": 8
525
+ },
526
+ "model.layers.45.block_sparse_moe.gate": {
527
+ "group_size": 64,
528
+ "bits": 8
529
+ },
530
+ "model.layers.46.block_sparse_moe.gate": {
531
+ "group_size": 64,
532
+ "bits": 8
533
+ },
534
+ "model.layers.47.block_sparse_moe.gate": {
535
+ "group_size": 64,
536
+ "bits": 8
537
+ },
538
+ "model.layers.48.block_sparse_moe.gate": {
539
+ "group_size": 64,
540
+ "bits": 8
541
+ },
542
+ "model.layers.49.block_sparse_moe.gate": {
543
+ "group_size": 64,
544
+ "bits": 8
545
+ },
546
+ "model.layers.50.block_sparse_moe.gate": {
547
+ "group_size": 64,
548
+ "bits": 8
549
+ },
550
+ "model.layers.51.block_sparse_moe.gate": {
551
+ "group_size": 64,
552
+ "bits": 8
553
+ },
554
+ "model.layers.52.block_sparse_moe.gate": {
555
+ "group_size": 64,
556
+ "bits": 8
557
+ },
558
+ "model.layers.53.block_sparse_moe.gate": {
559
+ "group_size": 64,
560
+ "bits": 8
561
+ },
562
+ "model.layers.54.block_sparse_moe.gate": {
563
+ "group_size": 64,
564
+ "bits": 8
565
+ },
566
+ "model.layers.55.block_sparse_moe.gate": {
567
+ "group_size": 64,
568
+ "bits": 8
569
+ },
570
+ "model.layers.56.block_sparse_moe.gate": {
571
+ "group_size": 64,
572
+ "bits": 8
573
+ },
574
+ "model.layers.57.block_sparse_moe.gate": {
575
+ "group_size": 64,
576
+ "bits": 8
577
+ },
578
+ "model.layers.58.block_sparse_moe.gate": {
579
+ "group_size": 64,
580
+ "bits": 8
581
+ },
582
+ "model.layers.59.block_sparse_moe.gate": {
583
+ "group_size": 64,
584
+ "bits": 8
585
+ },
586
+ "model.layers.60.block_sparse_moe.gate": {
587
+ "group_size": 64,
588
+ "bits": 8
589
+ },
590
+ "model.layers.61.block_sparse_moe.gate": {
591
+ "group_size": 64,
592
+ "bits": 8
593
+ }
594
+ },
595
+ "rms_norm_eps": 1e-06,
596
+ "rope_theta": 5000000,
597
+ "rotary_dim": 64,
598
+ "scoring_func": "sigmoid",
599
+ "shared_intermediate_size": 0,
600
+ "tie_word_embeddings": false,
601
+ "transformers_version": "4.46.1",
602
+ "use_cache": true,
603
+ "use_mtp": true,
604
+ "use_qk_norm": true,
605
+ "use_routing_bias": true,
606
+ "vocab_size": 200064
607
+ }