Osaurus-AI commited on
Commit
932e37a
·
verified ·
1 Parent(s): 5776428

config.json: per-module quant overrides + rope_parameters (metadata bug fix)

Browse files
Files changed (1) hide show
  1. config.json +1261 -3
config.json CHANGED
@@ -99,7 +99,1265 @@
99
  "vocab_size": 200064,
100
  "_name_or_path": "MiniMax-M2.7-Small",
101
  "quantization": {
102
- "group_size": 64,
103
- "bits": 2
104
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  }
 
99
  "vocab_size": 200064,
100
  "_name_or_path": "MiniMax-M2.7-Small",
101
  "quantization": {
102
+ "group_size": 32,
103
+ "bits": 8,
104
+ "mode": "affine",
105
+ "lm_head": {
106
+ "group_size": 64,
107
+ "bits": 8,
108
+ "mode": "affine"
109
+ },
110
+ "model.embed_tokens": {
111
+ "group_size": 64,
112
+ "bits": 8,
113
+ "mode": "affine"
114
+ },
115
+ "model.layers.0.self_attn.k_proj": {
116
+ "group_size": 64,
117
+ "bits": 8,
118
+ "mode": "affine"
119
+ },
120
+ "model.layers.0.self_attn.o_proj": {
121
+ "group_size": 64,
122
+ "bits": 8,
123
+ "mode": "affine"
124
+ },
125
+ "model.layers.0.self_attn.q_proj": {
126
+ "group_size": 64,
127
+ "bits": 8,
128
+ "mode": "affine"
129
+ },
130
+ "model.layers.0.self_attn.v_proj": {
131
+ "group_size": 64,
132
+ "bits": 8,
133
+ "mode": "affine"
134
+ },
135
+ "model.layers.1.self_attn.k_proj": {
136
+ "group_size": 64,
137
+ "bits": 8,
138
+ "mode": "affine"
139
+ },
140
+ "model.layers.1.self_attn.o_proj": {
141
+ "group_size": 64,
142
+ "bits": 8,
143
+ "mode": "affine"
144
+ },
145
+ "model.layers.1.self_attn.q_proj": {
146
+ "group_size": 64,
147
+ "bits": 8,
148
+ "mode": "affine"
149
+ },
150
+ "model.layers.1.self_attn.v_proj": {
151
+ "group_size": 64,
152
+ "bits": 8,
153
+ "mode": "affine"
154
+ },
155
+ "model.layers.10.self_attn.k_proj": {
156
+ "group_size": 64,
157
+ "bits": 8,
158
+ "mode": "affine"
159
+ },
160
+ "model.layers.10.self_attn.o_proj": {
161
+ "group_size": 64,
162
+ "bits": 8,
163
+ "mode": "affine"
164
+ },
165
+ "model.layers.10.self_attn.q_proj": {
166
+ "group_size": 64,
167
+ "bits": 8,
168
+ "mode": "affine"
169
+ },
170
+ "model.layers.10.self_attn.v_proj": {
171
+ "group_size": 64,
172
+ "bits": 8,
173
+ "mode": "affine"
174
+ },
175
+ "model.layers.11.self_attn.k_proj": {
176
+ "group_size": 64,
177
+ "bits": 8,
178
+ "mode": "affine"
179
+ },
180
+ "model.layers.11.self_attn.o_proj": {
181
+ "group_size": 64,
182
+ "bits": 8,
183
+ "mode": "affine"
184
+ },
185
+ "model.layers.11.self_attn.q_proj": {
186
+ "group_size": 64,
187
+ "bits": 8,
188
+ "mode": "affine"
189
+ },
190
+ "model.layers.11.self_attn.v_proj": {
191
+ "group_size": 64,
192
+ "bits": 8,
193
+ "mode": "affine"
194
+ },
195
+ "model.layers.12.self_attn.k_proj": {
196
+ "group_size": 64,
197
+ "bits": 8,
198
+ "mode": "affine"
199
+ },
200
+ "model.layers.12.self_attn.o_proj": {
201
+ "group_size": 64,
202
+ "bits": 8,
203
+ "mode": "affine"
204
+ },
205
+ "model.layers.12.self_attn.q_proj": {
206
+ "group_size": 64,
207
+ "bits": 8,
208
+ "mode": "affine"
209
+ },
210
+ "model.layers.12.self_attn.v_proj": {
211
+ "group_size": 64,
212
+ "bits": 8,
213
+ "mode": "affine"
214
+ },
215
+ "model.layers.13.self_attn.k_proj": {
216
+ "group_size": 64,
217
+ "bits": 8,
218
+ "mode": "affine"
219
+ },
220
+ "model.layers.13.self_attn.o_proj": {
221
+ "group_size": 64,
222
+ "bits": 8,
223
+ "mode": "affine"
224
+ },
225
+ "model.layers.13.self_attn.q_proj": {
226
+ "group_size": 64,
227
+ "bits": 8,
228
+ "mode": "affine"
229
+ },
230
+ "model.layers.13.self_attn.v_proj": {
231
+ "group_size": 64,
232
+ "bits": 8,
233
+ "mode": "affine"
234
+ },
235
+ "model.layers.14.self_attn.k_proj": {
236
+ "group_size": 64,
237
+ "bits": 8,
238
+ "mode": "affine"
239
+ },
240
+ "model.layers.14.self_attn.o_proj": {
241
+ "group_size": 64,
242
+ "bits": 8,
243
+ "mode": "affine"
244
+ },
245
+ "model.layers.14.self_attn.q_proj": {
246
+ "group_size": 64,
247
+ "bits": 8,
248
+ "mode": "affine"
249
+ },
250
+ "model.layers.14.self_attn.v_proj": {
251
+ "group_size": 64,
252
+ "bits": 8,
253
+ "mode": "affine"
254
+ },
255
+ "model.layers.15.self_attn.k_proj": {
256
+ "group_size": 64,
257
+ "bits": 8,
258
+ "mode": "affine"
259
+ },
260
+ "model.layers.15.self_attn.o_proj": {
261
+ "group_size": 64,
262
+ "bits": 8,
263
+ "mode": "affine"
264
+ },
265
+ "model.layers.15.self_attn.q_proj": {
266
+ "group_size": 64,
267
+ "bits": 8,
268
+ "mode": "affine"
269
+ },
270
+ "model.layers.15.self_attn.v_proj": {
271
+ "group_size": 64,
272
+ "bits": 8,
273
+ "mode": "affine"
274
+ },
275
+ "model.layers.16.self_attn.k_proj": {
276
+ "group_size": 64,
277
+ "bits": 8,
278
+ "mode": "affine"
279
+ },
280
+ "model.layers.16.self_attn.o_proj": {
281
+ "group_size": 64,
282
+ "bits": 8,
283
+ "mode": "affine"
284
+ },
285
+ "model.layers.16.self_attn.q_proj": {
286
+ "group_size": 64,
287
+ "bits": 8,
288
+ "mode": "affine"
289
+ },
290
+ "model.layers.16.self_attn.v_proj": {
291
+ "group_size": 64,
292
+ "bits": 8,
293
+ "mode": "affine"
294
+ },
295
+ "model.layers.17.self_attn.k_proj": {
296
+ "group_size": 64,
297
+ "bits": 8,
298
+ "mode": "affine"
299
+ },
300
+ "model.layers.17.self_attn.o_proj": {
301
+ "group_size": 64,
302
+ "bits": 8,
303
+ "mode": "affine"
304
+ },
305
+ "model.layers.17.self_attn.q_proj": {
306
+ "group_size": 64,
307
+ "bits": 8,
308
+ "mode": "affine"
309
+ },
310
+ "model.layers.17.self_attn.v_proj": {
311
+ "group_size": 64,
312
+ "bits": 8,
313
+ "mode": "affine"
314
+ },
315
+ "model.layers.18.self_attn.k_proj": {
316
+ "group_size": 64,
317
+ "bits": 8,
318
+ "mode": "affine"
319
+ },
320
+ "model.layers.18.self_attn.o_proj": {
321
+ "group_size": 64,
322
+ "bits": 8,
323
+ "mode": "affine"
324
+ },
325
+ "model.layers.18.self_attn.q_proj": {
326
+ "group_size": 64,
327
+ "bits": 8,
328
+ "mode": "affine"
329
+ },
330
+ "model.layers.18.self_attn.v_proj": {
331
+ "group_size": 64,
332
+ "bits": 8,
333
+ "mode": "affine"
334
+ },
335
+ "model.layers.19.self_attn.k_proj": {
336
+ "group_size": 64,
337
+ "bits": 8,
338
+ "mode": "affine"
339
+ },
340
+ "model.layers.19.self_attn.o_proj": {
341
+ "group_size": 64,
342
+ "bits": 8,
343
+ "mode": "affine"
344
+ },
345
+ "model.layers.19.self_attn.q_proj": {
346
+ "group_size": 64,
347
+ "bits": 8,
348
+ "mode": "affine"
349
+ },
350
+ "model.layers.19.self_attn.v_proj": {
351
+ "group_size": 64,
352
+ "bits": 8,
353
+ "mode": "affine"
354
+ },
355
+ "model.layers.2.self_attn.k_proj": {
356
+ "group_size": 64,
357
+ "bits": 8,
358
+ "mode": "affine"
359
+ },
360
+ "model.layers.2.self_attn.o_proj": {
361
+ "group_size": 64,
362
+ "bits": 8,
363
+ "mode": "affine"
364
+ },
365
+ "model.layers.2.self_attn.q_proj": {
366
+ "group_size": 64,
367
+ "bits": 8,
368
+ "mode": "affine"
369
+ },
370
+ "model.layers.2.self_attn.v_proj": {
371
+ "group_size": 64,
372
+ "bits": 8,
373
+ "mode": "affine"
374
+ },
375
+ "model.layers.20.self_attn.k_proj": {
376
+ "group_size": 64,
377
+ "bits": 8,
378
+ "mode": "affine"
379
+ },
380
+ "model.layers.20.self_attn.o_proj": {
381
+ "group_size": 64,
382
+ "bits": 8,
383
+ "mode": "affine"
384
+ },
385
+ "model.layers.20.self_attn.q_proj": {
386
+ "group_size": 64,
387
+ "bits": 8,
388
+ "mode": "affine"
389
+ },
390
+ "model.layers.20.self_attn.v_proj": {
391
+ "group_size": 64,
392
+ "bits": 8,
393
+ "mode": "affine"
394
+ },
395
+ "model.layers.21.self_attn.k_proj": {
396
+ "group_size": 64,
397
+ "bits": 8,
398
+ "mode": "affine"
399
+ },
400
+ "model.layers.21.self_attn.o_proj": {
401
+ "group_size": 64,
402
+ "bits": 8,
403
+ "mode": "affine"
404
+ },
405
+ "model.layers.21.self_attn.q_proj": {
406
+ "group_size": 64,
407
+ "bits": 8,
408
+ "mode": "affine"
409
+ },
410
+ "model.layers.21.self_attn.v_proj": {
411
+ "group_size": 64,
412
+ "bits": 8,
413
+ "mode": "affine"
414
+ },
415
+ "model.layers.22.self_attn.k_proj": {
416
+ "group_size": 64,
417
+ "bits": 8,
418
+ "mode": "affine"
419
+ },
420
+ "model.layers.22.self_attn.o_proj": {
421
+ "group_size": 64,
422
+ "bits": 8,
423
+ "mode": "affine"
424
+ },
425
+ "model.layers.22.self_attn.q_proj": {
426
+ "group_size": 64,
427
+ "bits": 8,
428
+ "mode": "affine"
429
+ },
430
+ "model.layers.22.self_attn.v_proj": {
431
+ "group_size": 64,
432
+ "bits": 8,
433
+ "mode": "affine"
434
+ },
435
+ "model.layers.23.self_attn.k_proj": {
436
+ "group_size": 64,
437
+ "bits": 8,
438
+ "mode": "affine"
439
+ },
440
+ "model.layers.23.self_attn.o_proj": {
441
+ "group_size": 64,
442
+ "bits": 8,
443
+ "mode": "affine"
444
+ },
445
+ "model.layers.23.self_attn.q_proj": {
446
+ "group_size": 64,
447
+ "bits": 8,
448
+ "mode": "affine"
449
+ },
450
+ "model.layers.23.self_attn.v_proj": {
451
+ "group_size": 64,
452
+ "bits": 8,
453
+ "mode": "affine"
454
+ },
455
+ "model.layers.24.self_attn.k_proj": {
456
+ "group_size": 64,
457
+ "bits": 8,
458
+ "mode": "affine"
459
+ },
460
+ "model.layers.24.self_attn.o_proj": {
461
+ "group_size": 64,
462
+ "bits": 8,
463
+ "mode": "affine"
464
+ },
465
+ "model.layers.24.self_attn.q_proj": {
466
+ "group_size": 64,
467
+ "bits": 8,
468
+ "mode": "affine"
469
+ },
470
+ "model.layers.24.self_attn.v_proj": {
471
+ "group_size": 64,
472
+ "bits": 8,
473
+ "mode": "affine"
474
+ },
475
+ "model.layers.25.self_attn.k_proj": {
476
+ "group_size": 64,
477
+ "bits": 8,
478
+ "mode": "affine"
479
+ },
480
+ "model.layers.25.self_attn.o_proj": {
481
+ "group_size": 64,
482
+ "bits": 8,
483
+ "mode": "affine"
484
+ },
485
+ "model.layers.25.self_attn.q_proj": {
486
+ "group_size": 64,
487
+ "bits": 8,
488
+ "mode": "affine"
489
+ },
490
+ "model.layers.25.self_attn.v_proj": {
491
+ "group_size": 64,
492
+ "bits": 8,
493
+ "mode": "affine"
494
+ },
495
+ "model.layers.26.self_attn.k_proj": {
496
+ "group_size": 64,
497
+ "bits": 8,
498
+ "mode": "affine"
499
+ },
500
+ "model.layers.26.self_attn.o_proj": {
501
+ "group_size": 64,
502
+ "bits": 8,
503
+ "mode": "affine"
504
+ },
505
+ "model.layers.26.self_attn.q_proj": {
506
+ "group_size": 64,
507
+ "bits": 8,
508
+ "mode": "affine"
509
+ },
510
+ "model.layers.26.self_attn.v_proj": {
511
+ "group_size": 64,
512
+ "bits": 8,
513
+ "mode": "affine"
514
+ },
515
+ "model.layers.27.self_attn.k_proj": {
516
+ "group_size": 64,
517
+ "bits": 8,
518
+ "mode": "affine"
519
+ },
520
+ "model.layers.27.self_attn.o_proj": {
521
+ "group_size": 64,
522
+ "bits": 8,
523
+ "mode": "affine"
524
+ },
525
+ "model.layers.27.self_attn.q_proj": {
526
+ "group_size": 64,
527
+ "bits": 8,
528
+ "mode": "affine"
529
+ },
530
+ "model.layers.27.self_attn.v_proj": {
531
+ "group_size": 64,
532
+ "bits": 8,
533
+ "mode": "affine"
534
+ },
535
+ "model.layers.28.self_attn.k_proj": {
536
+ "group_size": 64,
537
+ "bits": 8,
538
+ "mode": "affine"
539
+ },
540
+ "model.layers.28.self_attn.o_proj": {
541
+ "group_size": 64,
542
+ "bits": 8,
543
+ "mode": "affine"
544
+ },
545
+ "model.layers.28.self_attn.q_proj": {
546
+ "group_size": 64,
547
+ "bits": 8,
548
+ "mode": "affine"
549
+ },
550
+ "model.layers.28.self_attn.v_proj": {
551
+ "group_size": 64,
552
+ "bits": 8,
553
+ "mode": "affine"
554
+ },
555
+ "model.layers.29.self_attn.k_proj": {
556
+ "group_size": 64,
557
+ "bits": 8,
558
+ "mode": "affine"
559
+ },
560
+ "model.layers.29.self_attn.o_proj": {
561
+ "group_size": 64,
562
+ "bits": 8,
563
+ "mode": "affine"
564
+ },
565
+ "model.layers.29.self_attn.q_proj": {
566
+ "group_size": 64,
567
+ "bits": 8,
568
+ "mode": "affine"
569
+ },
570
+ "model.layers.29.self_attn.v_proj": {
571
+ "group_size": 64,
572
+ "bits": 8,
573
+ "mode": "affine"
574
+ },
575
+ "model.layers.3.self_attn.k_proj": {
576
+ "group_size": 64,
577
+ "bits": 8,
578
+ "mode": "affine"
579
+ },
580
+ "model.layers.3.self_attn.o_proj": {
581
+ "group_size": 64,
582
+ "bits": 8,
583
+ "mode": "affine"
584
+ },
585
+ "model.layers.3.self_attn.q_proj": {
586
+ "group_size": 64,
587
+ "bits": 8,
588
+ "mode": "affine"
589
+ },
590
+ "model.layers.3.self_attn.v_proj": {
591
+ "group_size": 64,
592
+ "bits": 8,
593
+ "mode": "affine"
594
+ },
595
+ "model.layers.30.self_attn.k_proj": {
596
+ "group_size": 64,
597
+ "bits": 8,
598
+ "mode": "affine"
599
+ },
600
+ "model.layers.30.self_attn.o_proj": {
601
+ "group_size": 64,
602
+ "bits": 8,
603
+ "mode": "affine"
604
+ },
605
+ "model.layers.30.self_attn.q_proj": {
606
+ "group_size": 64,
607
+ "bits": 8,
608
+ "mode": "affine"
609
+ },
610
+ "model.layers.30.self_attn.v_proj": {
611
+ "group_size": 64,
612
+ "bits": 8,
613
+ "mode": "affine"
614
+ },
615
+ "model.layers.31.self_attn.k_proj": {
616
+ "group_size": 64,
617
+ "bits": 8,
618
+ "mode": "affine"
619
+ },
620
+ "model.layers.31.self_attn.o_proj": {
621
+ "group_size": 64,
622
+ "bits": 8,
623
+ "mode": "affine"
624
+ },
625
+ "model.layers.31.self_attn.q_proj": {
626
+ "group_size": 64,
627
+ "bits": 8,
628
+ "mode": "affine"
629
+ },
630
+ "model.layers.31.self_attn.v_proj": {
631
+ "group_size": 64,
632
+ "bits": 8,
633
+ "mode": "affine"
634
+ },
635
+ "model.layers.32.self_attn.k_proj": {
636
+ "group_size": 64,
637
+ "bits": 8,
638
+ "mode": "affine"
639
+ },
640
+ "model.layers.32.self_attn.o_proj": {
641
+ "group_size": 64,
642
+ "bits": 8,
643
+ "mode": "affine"
644
+ },
645
+ "model.layers.32.self_attn.q_proj": {
646
+ "group_size": 64,
647
+ "bits": 8,
648
+ "mode": "affine"
649
+ },
650
+ "model.layers.32.self_attn.v_proj": {
651
+ "group_size": 64,
652
+ "bits": 8,
653
+ "mode": "affine"
654
+ },
655
+ "model.layers.33.self_attn.k_proj": {
656
+ "group_size": 64,
657
+ "bits": 8,
658
+ "mode": "affine"
659
+ },
660
+ "model.layers.33.self_attn.o_proj": {
661
+ "group_size": 64,
662
+ "bits": 8,
663
+ "mode": "affine"
664
+ },
665
+ "model.layers.33.self_attn.q_proj": {
666
+ "group_size": 64,
667
+ "bits": 8,
668
+ "mode": "affine"
669
+ },
670
+ "model.layers.33.self_attn.v_proj": {
671
+ "group_size": 64,
672
+ "bits": 8,
673
+ "mode": "affine"
674
+ },
675
+ "model.layers.34.self_attn.k_proj": {
676
+ "group_size": 64,
677
+ "bits": 8,
678
+ "mode": "affine"
679
+ },
680
+ "model.layers.34.self_attn.o_proj": {
681
+ "group_size": 64,
682
+ "bits": 8,
683
+ "mode": "affine"
684
+ },
685
+ "model.layers.34.self_attn.q_proj": {
686
+ "group_size": 64,
687
+ "bits": 8,
688
+ "mode": "affine"
689
+ },
690
+ "model.layers.34.self_attn.v_proj": {
691
+ "group_size": 64,
692
+ "bits": 8,
693
+ "mode": "affine"
694
+ },
695
+ "model.layers.35.self_attn.k_proj": {
696
+ "group_size": 64,
697
+ "bits": 8,
698
+ "mode": "affine"
699
+ },
700
+ "model.layers.35.self_attn.o_proj": {
701
+ "group_size": 64,
702
+ "bits": 8,
703
+ "mode": "affine"
704
+ },
705
+ "model.layers.35.self_attn.q_proj": {
706
+ "group_size": 64,
707
+ "bits": 8,
708
+ "mode": "affine"
709
+ },
710
+ "model.layers.35.self_attn.v_proj": {
711
+ "group_size": 64,
712
+ "bits": 8,
713
+ "mode": "affine"
714
+ },
715
+ "model.layers.36.self_attn.k_proj": {
716
+ "group_size": 64,
717
+ "bits": 8,
718
+ "mode": "affine"
719
+ },
720
+ "model.layers.36.self_attn.o_proj": {
721
+ "group_size": 64,
722
+ "bits": 8,
723
+ "mode": "affine"
724
+ },
725
+ "model.layers.36.self_attn.q_proj": {
726
+ "group_size": 64,
727
+ "bits": 8,
728
+ "mode": "affine"
729
+ },
730
+ "model.layers.36.self_attn.v_proj": {
731
+ "group_size": 64,
732
+ "bits": 8,
733
+ "mode": "affine"
734
+ },
735
+ "model.layers.37.self_attn.k_proj": {
736
+ "group_size": 64,
737
+ "bits": 8,
738
+ "mode": "affine"
739
+ },
740
+ "model.layers.37.self_attn.o_proj": {
741
+ "group_size": 64,
742
+ "bits": 8,
743
+ "mode": "affine"
744
+ },
745
+ "model.layers.37.self_attn.q_proj": {
746
+ "group_size": 64,
747
+ "bits": 8,
748
+ "mode": "affine"
749
+ },
750
+ "model.layers.37.self_attn.v_proj": {
751
+ "group_size": 64,
752
+ "bits": 8,
753
+ "mode": "affine"
754
+ },
755
+ "model.layers.38.self_attn.k_proj": {
756
+ "group_size": 64,
757
+ "bits": 8,
758
+ "mode": "affine"
759
+ },
760
+ "model.layers.38.self_attn.o_proj": {
761
+ "group_size": 64,
762
+ "bits": 8,
763
+ "mode": "affine"
764
+ },
765
+ "model.layers.38.self_attn.q_proj": {
766
+ "group_size": 64,
767
+ "bits": 8,
768
+ "mode": "affine"
769
+ },
770
+ "model.layers.38.self_attn.v_proj": {
771
+ "group_size": 64,
772
+ "bits": 8,
773
+ "mode": "affine"
774
+ },
775
+ "model.layers.39.self_attn.k_proj": {
776
+ "group_size": 64,
777
+ "bits": 8,
778
+ "mode": "affine"
779
+ },
780
+ "model.layers.39.self_attn.o_proj": {
781
+ "group_size": 64,
782
+ "bits": 8,
783
+ "mode": "affine"
784
+ },
785
+ "model.layers.39.self_attn.q_proj": {
786
+ "group_size": 64,
787
+ "bits": 8,
788
+ "mode": "affine"
789
+ },
790
+ "model.layers.39.self_attn.v_proj": {
791
+ "group_size": 64,
792
+ "bits": 8,
793
+ "mode": "affine"
794
+ },
795
+ "model.layers.4.self_attn.k_proj": {
796
+ "group_size": 64,
797
+ "bits": 8,
798
+ "mode": "affine"
799
+ },
800
+ "model.layers.4.self_attn.o_proj": {
801
+ "group_size": 64,
802
+ "bits": 8,
803
+ "mode": "affine"
804
+ },
805
+ "model.layers.4.self_attn.q_proj": {
806
+ "group_size": 64,
807
+ "bits": 8,
808
+ "mode": "affine"
809
+ },
810
+ "model.layers.4.self_attn.v_proj": {
811
+ "group_size": 64,
812
+ "bits": 8,
813
+ "mode": "affine"
814
+ },
815
+ "model.layers.40.self_attn.k_proj": {
816
+ "group_size": 64,
817
+ "bits": 8,
818
+ "mode": "affine"
819
+ },
820
+ "model.layers.40.self_attn.o_proj": {
821
+ "group_size": 64,
822
+ "bits": 8,
823
+ "mode": "affine"
824
+ },
825
+ "model.layers.40.self_attn.q_proj": {
826
+ "group_size": 64,
827
+ "bits": 8,
828
+ "mode": "affine"
829
+ },
830
+ "model.layers.40.self_attn.v_proj": {
831
+ "group_size": 64,
832
+ "bits": 8,
833
+ "mode": "affine"
834
+ },
835
+ "model.layers.41.self_attn.k_proj": {
836
+ "group_size": 64,
837
+ "bits": 8,
838
+ "mode": "affine"
839
+ },
840
+ "model.layers.41.self_attn.o_proj": {
841
+ "group_size": 64,
842
+ "bits": 8,
843
+ "mode": "affine"
844
+ },
845
+ "model.layers.41.self_attn.q_proj": {
846
+ "group_size": 64,
847
+ "bits": 8,
848
+ "mode": "affine"
849
+ },
850
+ "model.layers.41.self_attn.v_proj": {
851
+ "group_size": 64,
852
+ "bits": 8,
853
+ "mode": "affine"
854
+ },
855
+ "model.layers.42.self_attn.k_proj": {
856
+ "group_size": 64,
857
+ "bits": 8,
858
+ "mode": "affine"
859
+ },
860
+ "model.layers.42.self_attn.o_proj": {
861
+ "group_size": 64,
862
+ "bits": 8,
863
+ "mode": "affine"
864
+ },
865
+ "model.layers.42.self_attn.q_proj": {
866
+ "group_size": 64,
867
+ "bits": 8,
868
+ "mode": "affine"
869
+ },
870
+ "model.layers.42.self_attn.v_proj": {
871
+ "group_size": 64,
872
+ "bits": 8,
873
+ "mode": "affine"
874
+ },
875
+ "model.layers.43.self_attn.k_proj": {
876
+ "group_size": 64,
877
+ "bits": 8,
878
+ "mode": "affine"
879
+ },
880
+ "model.layers.43.self_attn.o_proj": {
881
+ "group_size": 64,
882
+ "bits": 8,
883
+ "mode": "affine"
884
+ },
885
+ "model.layers.43.self_attn.q_proj": {
886
+ "group_size": 64,
887
+ "bits": 8,
888
+ "mode": "affine"
889
+ },
890
+ "model.layers.43.self_attn.v_proj": {
891
+ "group_size": 64,
892
+ "bits": 8,
893
+ "mode": "affine"
894
+ },
895
+ "model.layers.44.self_attn.k_proj": {
896
+ "group_size": 64,
897
+ "bits": 8,
898
+ "mode": "affine"
899
+ },
900
+ "model.layers.44.self_attn.o_proj": {
901
+ "group_size": 64,
902
+ "bits": 8,
903
+ "mode": "affine"
904
+ },
905
+ "model.layers.44.self_attn.q_proj": {
906
+ "group_size": 64,
907
+ "bits": 8,
908
+ "mode": "affine"
909
+ },
910
+ "model.layers.44.self_attn.v_proj": {
911
+ "group_size": 64,
912
+ "bits": 8,
913
+ "mode": "affine"
914
+ },
915
+ "model.layers.45.self_attn.k_proj": {
916
+ "group_size": 64,
917
+ "bits": 8,
918
+ "mode": "affine"
919
+ },
920
+ "model.layers.45.self_attn.o_proj": {
921
+ "group_size": 64,
922
+ "bits": 8,
923
+ "mode": "affine"
924
+ },
925
+ "model.layers.45.self_attn.q_proj": {
926
+ "group_size": 64,
927
+ "bits": 8,
928
+ "mode": "affine"
929
+ },
930
+ "model.layers.45.self_attn.v_proj": {
931
+ "group_size": 64,
932
+ "bits": 8,
933
+ "mode": "affine"
934
+ },
935
+ "model.layers.46.self_attn.k_proj": {
936
+ "group_size": 64,
937
+ "bits": 8,
938
+ "mode": "affine"
939
+ },
940
+ "model.layers.46.self_attn.o_proj": {
941
+ "group_size": 64,
942
+ "bits": 8,
943
+ "mode": "affine"
944
+ },
945
+ "model.layers.46.self_attn.q_proj": {
946
+ "group_size": 64,
947
+ "bits": 8,
948
+ "mode": "affine"
949
+ },
950
+ "model.layers.46.self_attn.v_proj": {
951
+ "group_size": 64,
952
+ "bits": 8,
953
+ "mode": "affine"
954
+ },
955
+ "model.layers.47.self_attn.k_proj": {
956
+ "group_size": 64,
957
+ "bits": 8,
958
+ "mode": "affine"
959
+ },
960
+ "model.layers.47.self_attn.o_proj": {
961
+ "group_size": 64,
962
+ "bits": 8,
963
+ "mode": "affine"
964
+ },
965
+ "model.layers.47.self_attn.q_proj": {
966
+ "group_size": 64,
967
+ "bits": 8,
968
+ "mode": "affine"
969
+ },
970
+ "model.layers.47.self_attn.v_proj": {
971
+ "group_size": 64,
972
+ "bits": 8,
973
+ "mode": "affine"
974
+ },
975
+ "model.layers.48.self_attn.k_proj": {
976
+ "group_size": 64,
977
+ "bits": 8,
978
+ "mode": "affine"
979
+ },
980
+ "model.layers.48.self_attn.o_proj": {
981
+ "group_size": 64,
982
+ "bits": 8,
983
+ "mode": "affine"
984
+ },
985
+ "model.layers.48.self_attn.q_proj": {
986
+ "group_size": 64,
987
+ "bits": 8,
988
+ "mode": "affine"
989
+ },
990
+ "model.layers.48.self_attn.v_proj": {
991
+ "group_size": 64,
992
+ "bits": 8,
993
+ "mode": "affine"
994
+ },
995
+ "model.layers.49.self_attn.k_proj": {
996
+ "group_size": 64,
997
+ "bits": 8,
998
+ "mode": "affine"
999
+ },
1000
+ "model.layers.49.self_attn.o_proj": {
1001
+ "group_size": 64,
1002
+ "bits": 8,
1003
+ "mode": "affine"
1004
+ },
1005
+ "model.layers.49.self_attn.q_proj": {
1006
+ "group_size": 64,
1007
+ "bits": 8,
1008
+ "mode": "affine"
1009
+ },
1010
+ "model.layers.49.self_attn.v_proj": {
1011
+ "group_size": 64,
1012
+ "bits": 8,
1013
+ "mode": "affine"
1014
+ },
1015
+ "model.layers.5.self_attn.k_proj": {
1016
+ "group_size": 64,
1017
+ "bits": 8,
1018
+ "mode": "affine"
1019
+ },
1020
+ "model.layers.5.self_attn.o_proj": {
1021
+ "group_size": 64,
1022
+ "bits": 8,
1023
+ "mode": "affine"
1024
+ },
1025
+ "model.layers.5.self_attn.q_proj": {
1026
+ "group_size": 64,
1027
+ "bits": 8,
1028
+ "mode": "affine"
1029
+ },
1030
+ "model.layers.5.self_attn.v_proj": {
1031
+ "group_size": 64,
1032
+ "bits": 8,
1033
+ "mode": "affine"
1034
+ },
1035
+ "model.layers.50.self_attn.k_proj": {
1036
+ "group_size": 64,
1037
+ "bits": 8,
1038
+ "mode": "affine"
1039
+ },
1040
+ "model.layers.50.self_attn.o_proj": {
1041
+ "group_size": 64,
1042
+ "bits": 8,
1043
+ "mode": "affine"
1044
+ },
1045
+ "model.layers.50.self_attn.q_proj": {
1046
+ "group_size": 64,
1047
+ "bits": 8,
1048
+ "mode": "affine"
1049
+ },
1050
+ "model.layers.50.self_attn.v_proj": {
1051
+ "group_size": 64,
1052
+ "bits": 8,
1053
+ "mode": "affine"
1054
+ },
1055
+ "model.layers.51.self_attn.k_proj": {
1056
+ "group_size": 64,
1057
+ "bits": 8,
1058
+ "mode": "affine"
1059
+ },
1060
+ "model.layers.51.self_attn.o_proj": {
1061
+ "group_size": 64,
1062
+ "bits": 8,
1063
+ "mode": "affine"
1064
+ },
1065
+ "model.layers.51.self_attn.q_proj": {
1066
+ "group_size": 64,
1067
+ "bits": 8,
1068
+ "mode": "affine"
1069
+ },
1070
+ "model.layers.51.self_attn.v_proj": {
1071
+ "group_size": 64,
1072
+ "bits": 8,
1073
+ "mode": "affine"
1074
+ },
1075
+ "model.layers.52.self_attn.k_proj": {
1076
+ "group_size": 64,
1077
+ "bits": 8,
1078
+ "mode": "affine"
1079
+ },
1080
+ "model.layers.52.self_attn.o_proj": {
1081
+ "group_size": 64,
1082
+ "bits": 8,
1083
+ "mode": "affine"
1084
+ },
1085
+ "model.layers.52.self_attn.q_proj": {
1086
+ "group_size": 64,
1087
+ "bits": 8,
1088
+ "mode": "affine"
1089
+ },
1090
+ "model.layers.52.self_attn.v_proj": {
1091
+ "group_size": 64,
1092
+ "bits": 8,
1093
+ "mode": "affine"
1094
+ },
1095
+ "model.layers.53.self_attn.k_proj": {
1096
+ "group_size": 64,
1097
+ "bits": 8,
1098
+ "mode": "affine"
1099
+ },
1100
+ "model.layers.53.self_attn.o_proj": {
1101
+ "group_size": 64,
1102
+ "bits": 8,
1103
+ "mode": "affine"
1104
+ },
1105
+ "model.layers.53.self_attn.q_proj": {
1106
+ "group_size": 64,
1107
+ "bits": 8,
1108
+ "mode": "affine"
1109
+ },
1110
+ "model.layers.53.self_attn.v_proj": {
1111
+ "group_size": 64,
1112
+ "bits": 8,
1113
+ "mode": "affine"
1114
+ },
1115
+ "model.layers.54.self_attn.k_proj": {
1116
+ "group_size": 64,
1117
+ "bits": 8,
1118
+ "mode": "affine"
1119
+ },
1120
+ "model.layers.54.self_attn.o_proj": {
1121
+ "group_size": 64,
1122
+ "bits": 8,
1123
+ "mode": "affine"
1124
+ },
1125
+ "model.layers.54.self_attn.q_proj": {
1126
+ "group_size": 64,
1127
+ "bits": 8,
1128
+ "mode": "affine"
1129
+ },
1130
+ "model.layers.54.self_attn.v_proj": {
1131
+ "group_size": 64,
1132
+ "bits": 8,
1133
+ "mode": "affine"
1134
+ },
1135
+ "model.layers.55.self_attn.k_proj": {
1136
+ "group_size": 64,
1137
+ "bits": 8,
1138
+ "mode": "affine"
1139
+ },
1140
+ "model.layers.55.self_attn.o_proj": {
1141
+ "group_size": 64,
1142
+ "bits": 8,
1143
+ "mode": "affine"
1144
+ },
1145
+ "model.layers.55.self_attn.q_proj": {
1146
+ "group_size": 64,
1147
+ "bits": 8,
1148
+ "mode": "affine"
1149
+ },
1150
+ "model.layers.55.self_attn.v_proj": {
1151
+ "group_size": 64,
1152
+ "bits": 8,
1153
+ "mode": "affine"
1154
+ },
1155
+ "model.layers.56.self_attn.k_proj": {
1156
+ "group_size": 64,
1157
+ "bits": 8,
1158
+ "mode": "affine"
1159
+ },
1160
+ "model.layers.56.self_attn.o_proj": {
1161
+ "group_size": 64,
1162
+ "bits": 8,
1163
+ "mode": "affine"
1164
+ },
1165
+ "model.layers.56.self_attn.q_proj": {
1166
+ "group_size": 64,
1167
+ "bits": 8,
1168
+ "mode": "affine"
1169
+ },
1170
+ "model.layers.56.self_attn.v_proj": {
1171
+ "group_size": 64,
1172
+ "bits": 8,
1173
+ "mode": "affine"
1174
+ },
1175
+ "model.layers.57.self_attn.k_proj": {
1176
+ "group_size": 64,
1177
+ "bits": 8,
1178
+ "mode": "affine"
1179
+ },
1180
+ "model.layers.57.self_attn.o_proj": {
1181
+ "group_size": 64,
1182
+ "bits": 8,
1183
+ "mode": "affine"
1184
+ },
1185
+ "model.layers.57.self_attn.q_proj": {
1186
+ "group_size": 64,
1187
+ "bits": 8,
1188
+ "mode": "affine"
1189
+ },
1190
+ "model.layers.57.self_attn.v_proj": {
1191
+ "group_size": 64,
1192
+ "bits": 8,
1193
+ "mode": "affine"
1194
+ },
1195
+ "model.layers.58.self_attn.k_proj": {
1196
+ "group_size": 64,
1197
+ "bits": 8,
1198
+ "mode": "affine"
1199
+ },
1200
+ "model.layers.58.self_attn.o_proj": {
1201
+ "group_size": 64,
1202
+ "bits": 8,
1203
+ "mode": "affine"
1204
+ },
1205
+ "model.layers.58.self_attn.q_proj": {
1206
+ "group_size": 64,
1207
+ "bits": 8,
1208
+ "mode": "affine"
1209
+ },
1210
+ "model.layers.58.self_attn.v_proj": {
1211
+ "group_size": 64,
1212
+ "bits": 8,
1213
+ "mode": "affine"
1214
+ },
1215
+ "model.layers.59.self_attn.k_proj": {
1216
+ "group_size": 64,
1217
+ "bits": 8,
1218
+ "mode": "affine"
1219
+ },
1220
+ "model.layers.59.self_attn.o_proj": {
1221
+ "group_size": 64,
1222
+ "bits": 8,
1223
+ "mode": "affine"
1224
+ },
1225
+ "model.layers.59.self_attn.q_proj": {
1226
+ "group_size": 64,
1227
+ "bits": 8,
1228
+ "mode": "affine"
1229
+ },
1230
+ "model.layers.59.self_attn.v_proj": {
1231
+ "group_size": 64,
1232
+ "bits": 8,
1233
+ "mode": "affine"
1234
+ },
1235
+ "model.layers.6.self_attn.k_proj": {
1236
+ "group_size": 64,
1237
+ "bits": 8,
1238
+ "mode": "affine"
1239
+ },
1240
+ "model.layers.6.self_attn.o_proj": {
1241
+ "group_size": 64,
1242
+ "bits": 8,
1243
+ "mode": "affine"
1244
+ },
1245
+ "model.layers.6.self_attn.q_proj": {
1246
+ "group_size": 64,
1247
+ "bits": 8,
1248
+ "mode": "affine"
1249
+ },
1250
+ "model.layers.6.self_attn.v_proj": {
1251
+ "group_size": 64,
1252
+ "bits": 8,
1253
+ "mode": "affine"
1254
+ },
1255
+ "model.layers.60.self_attn.k_proj": {
1256
+ "group_size": 64,
1257
+ "bits": 8,
1258
+ "mode": "affine"
1259
+ },
1260
+ "model.layers.60.self_attn.o_proj": {
1261
+ "group_size": 64,
1262
+ "bits": 8,
1263
+ "mode": "affine"
1264
+ },
1265
+ "model.layers.60.self_attn.q_proj": {
1266
+ "group_size": 64,
1267
+ "bits": 8,
1268
+ "mode": "affine"
1269
+ },
1270
+ "model.layers.60.self_attn.v_proj": {
1271
+ "group_size": 64,
1272
+ "bits": 8,
1273
+ "mode": "affine"
1274
+ },
1275
+ "model.layers.61.self_attn.k_proj": {
1276
+ "group_size": 64,
1277
+ "bits": 8,
1278
+ "mode": "affine"
1279
+ },
1280
+ "model.layers.61.self_attn.o_proj": {
1281
+ "group_size": 64,
1282
+ "bits": 8,
1283
+ "mode": "affine"
1284
+ },
1285
+ "model.layers.61.self_attn.q_proj": {
1286
+ "group_size": 64,
1287
+ "bits": 8,
1288
+ "mode": "affine"
1289
+ },
1290
+ "model.layers.61.self_attn.v_proj": {
1291
+ "group_size": 64,
1292
+ "bits": 8,
1293
+ "mode": "affine"
1294
+ },
1295
+ "model.layers.7.self_attn.k_proj": {
1296
+ "group_size": 64,
1297
+ "bits": 8,
1298
+ "mode": "affine"
1299
+ },
1300
+ "model.layers.7.self_attn.o_proj": {
1301
+ "group_size": 64,
1302
+ "bits": 8,
1303
+ "mode": "affine"
1304
+ },
1305
+ "model.layers.7.self_attn.q_proj": {
1306
+ "group_size": 64,
1307
+ "bits": 8,
1308
+ "mode": "affine"
1309
+ },
1310
+ "model.layers.7.self_attn.v_proj": {
1311
+ "group_size": 64,
1312
+ "bits": 8,
1313
+ "mode": "affine"
1314
+ },
1315
+ "model.layers.8.self_attn.k_proj": {
1316
+ "group_size": 64,
1317
+ "bits": 8,
1318
+ "mode": "affine"
1319
+ },
1320
+ "model.layers.8.self_attn.o_proj": {
1321
+ "group_size": 64,
1322
+ "bits": 8,
1323
+ "mode": "affine"
1324
+ },
1325
+ "model.layers.8.self_attn.q_proj": {
1326
+ "group_size": 64,
1327
+ "bits": 8,
1328
+ "mode": "affine"
1329
+ },
1330
+ "model.layers.8.self_attn.v_proj": {
1331
+ "group_size": 64,
1332
+ "bits": 8,
1333
+ "mode": "affine"
1334
+ },
1335
+ "model.layers.9.self_attn.k_proj": {
1336
+ "group_size": 64,
1337
+ "bits": 8,
1338
+ "mode": "affine"
1339
+ },
1340
+ "model.layers.9.self_attn.o_proj": {
1341
+ "group_size": 64,
1342
+ "bits": 8,
1343
+ "mode": "affine"
1344
+ },
1345
+ "model.layers.9.self_attn.q_proj": {
1346
+ "group_size": 64,
1347
+ "bits": 8,
1348
+ "mode": "affine"
1349
+ },
1350
+ "model.layers.9.self_attn.v_proj": {
1351
+ "group_size": 64,
1352
+ "bits": 8,
1353
+ "mode": "affine"
1354
+ }
1355
+ },
1356
+ "rope_parameters": {
1357
+ "rope_type": "default",
1358
+ "rope_theta": 5000000.0
1359
+ },
1360
+ "routed_expert_bits": 2,
1361
+ "group_size": 32,
1362
+ "mxtq_seed": 42
1363
  }