bwshen-mi commited on
Commit
d38dfcb
·
verified ·
1 Parent(s): bc20170

Update MiMo-V2.5 config

Browse files

- enable_thinking = true by default in tokenizer_config.json
- update rope_theta, and length-related params in config.json

Files changed (2) hide show
  1. config.json +367 -367
  2. tokenizer_config.json +1 -1
config.json CHANGED
@@ -1,367 +1,367 @@
1
- {
2
- "architectures": [
3
- "MiMoV2ForCausalLM"
4
- ],
5
- "auto_map": {
6
- "AutoConfig": "configuration_mimo_v2.MiMoV2Config",
7
- "AutoModel": "modeling_mimo_v2.MiMoV2Model",
8
- "AutoModelForCausalLM": "modeling_mimo_v2.MiMoV2ForCausalLM"
9
- },
10
- "attention_bias": false,
11
- "attention_chunk_size": 128,
12
- "attention_dropout": 0.0,
13
- "attention_value_scale": 0.707,
14
- "attention_projection_layout": "fused_qkv",
15
- "add_full_attention_sink_bias": false,
16
- "add_swa_attention_sink_bias": true,
17
- "audio_config": {
18
- "add_post_norm": true,
19
- "audio_channels": 20,
20
- "audio_segment_size": 6000,
21
- "group_size": 4,
22
- "input_full_attention": true,
23
- "input_local_attn_heads": 16,
24
- "input_local_dim": 1024,
25
- "input_local_head_dim": 64,
26
- "input_local_hidden_dropout": 0.0,
27
- "input_local_intermediate_size": 4096,
28
- "input_local_layers": 6,
29
- "out_hidden_size": 4096,
30
- "partial_rotary_factor": 1.0,
31
- "projection_layers": 2,
32
- "rope_theta": 640000,
33
- "speech_vocab_size": "1280",
34
- "speech_zeroemb_idx": "1024"
35
- },
36
- "swa_num_key_value_heads": 8,
37
- "swa_num_attention_heads": 64,
38
- "swa_head_dim": 192,
39
- "swa_v_head_dim": 128,
40
- "dtype": "bfloat16",
41
- "eos_token_id": 151645,
42
- "head_dim": 192,
43
- "hidden_act": "silu",
44
- "hidden_size": 4096,
45
- "hybrid_block_size": null,
46
- "hybrid_layer_pattern": [
47
- 0,
48
- 1,
49
- 1,
50
- 1,
51
- 1,
52
- 0,
53
- 1,
54
- 1,
55
- 1,
56
- 1,
57
- 1,
58
- 0,
59
- 1,
60
- 1,
61
- 1,
62
- 1,
63
- 1,
64
- 0,
65
- 1,
66
- 1,
67
- 1,
68
- 1,
69
- 1,
70
- 0,
71
- 1,
72
- 1,
73
- 1,
74
- 1,
75
- 1,
76
- 0,
77
- 1,
78
- 1,
79
- 1,
80
- 1,
81
- 1,
82
- 0,
83
- 1,
84
- 1,
85
- 1,
86
- 1,
87
- 1,
88
- 0,
89
- 1,
90
- 1,
91
- 1,
92
- 1,
93
- 1,
94
- 0
95
- ],
96
- "image_token_id": 151655,
97
- "initializer_range": 0.02,
98
- "intermediate_size": 16384,
99
- "layernorm_epsilon": 1e-05,
100
- "max_position_embeddings": 262144,
101
- "model_type": "mimo_v2",
102
- "moe_intermediate_size": 2048,
103
- "moe_layer_freq": [
104
- 0,
105
- 1,
106
- 1,
107
- 1,
108
- 1,
109
- 1,
110
- 1,
111
- 1,
112
- 1,
113
- 1,
114
- 1,
115
- 1,
116
- 1,
117
- 1,
118
- 1,
119
- 1,
120
- 1,
121
- 1,
122
- 1,
123
- 1,
124
- 1,
125
- 1,
126
- 1,
127
- 1,
128
- 1,
129
- 1,
130
- 1,
131
- 1,
132
- 1,
133
- 1,
134
- 1,
135
- 1,
136
- 1,
137
- 1,
138
- 1,
139
- 1,
140
- 1,
141
- 1,
142
- 1,
143
- 1,
144
- 1,
145
- 1,
146
- 1,
147
- 1,
148
- 1,
149
- 1,
150
- 1,
151
- 1
152
- ],
153
- "n_group": 1,
154
- "n_routed_experts": 256,
155
- "n_shared_experts": null,
156
- "norm_topk_prob": true,
157
- "num_attention_heads": 64,
158
- "num_experts_per_tok": 8,
159
- "num_hidden_layers": 48,
160
- "num_key_value_heads": 4,
161
- "pad_token_id": 151643,
162
- "partial_rotary_factor": 0.334,
163
- "processor_config": {
164
- "audio_avg_pooler": 2,
165
- "audio_channels": 20,
166
- "audio_end_token_id": 151674,
167
- "audio_fmax": null,
168
- "audio_fmin": 0,
169
- "audio_group_size": 4,
170
- "audio_hop_length": 240,
171
- "audio_input_id_per_second": 25.0,
172
- "audio_kernel_size": 3,
173
- "audio_n_mels": 128,
174
- "audio_nfft": 960,
175
- "audio_sampling_rate": 24000,
176
- "audio_segment_size": 6000,
177
- "audio_start_token_id": 151673,
178
- "audio_stride_size": 2,
179
- "audio_token_id": 151669,
180
- "audio_window_size": 960,
181
- "audio_zeroemb_idx": [
182
- 1024,
183
- 1024,
184
- 1024,
185
- 1024,
186
- 1024,
187
- 1024,
188
- 1024,
189
- 1024,
190
- 1024,
191
- 1024,
192
- 1024,
193
- 1024,
194
- 1024,
195
- 1024,
196
- 1024,
197
- 1024,
198
- 1024,
199
- 1024,
200
- 1024,
201
- 1024
202
- ],
203
- "fps": 1.0,
204
- "image_max_pixels": 8388608,
205
- "image_min_pixels": 8192,
206
- "image_token_id": 151655,
207
- "max_frames": 1024,
208
- "merge_size": 2,
209
- "min_frames": null,
210
- "num_frames": null,
211
- "pad_token_id": 151643,
212
- "patch_size": 16,
213
- "rope_type": "rope",
214
- "temporal_compression_ratio": 1,
215
- "temporal_patch_size": 2,
216
- "use_per_grid_t_timestamps": false,
217
- "use_video_timestamps": true,
218
- "video_audio_interleave_length": 0.0,
219
- "video_end_token_id": 151671,
220
- "video_max_pixels": 8388608,
221
- "video_min_pixels": 8192,
222
- "video_process_num_threads": 16,
223
- "video_start_token_id": 151670,
224
- "video_token_id": 151656,
225
- "video_tokens_per_second": 2,
226
- "video_total_max_pixels": 67108864,
227
- "vision_end_token_id": 151653,
228
- "vision_start_token_id": 151652
229
- },
230
- "quantization_config": {
231
- "activation_scheme": "dynamic",
232
- "fmt": "e4m3",
233
- "quant_method": "fp8",
234
- "store_dtype": "fp8",
235
- "ignored_layers": [
236
- "model.layers.0.self_attn.o_proj",
237
- "model.layers.1.self_attn.o_proj",
238
- "model.layers.2.self_attn.o_proj",
239
- "model.layers.3.self_attn.o_proj",
240
- "model.layers.4.self_attn.o_proj",
241
- "model.layers.5.self_attn.o_proj",
242
- "model.layers.6.self_attn.o_proj",
243
- "model.layers.7.self_attn.o_proj",
244
- "model.layers.8.self_attn.o_proj",
245
- "model.layers.9.self_attn.o_proj",
246
- "model.layers.10.self_attn.o_proj",
247
- "model.layers.11.self_attn.o_proj",
248
- "model.layers.12.self_attn.o_proj",
249
- "model.layers.13.self_attn.o_proj",
250
- "model.layers.14.self_attn.o_proj",
251
- "model.layers.15.self_attn.o_proj",
252
- "model.layers.16.self_attn.o_proj",
253
- "model.layers.17.self_attn.o_proj",
254
- "model.layers.18.self_attn.o_proj",
255
- "model.layers.19.self_attn.o_proj",
256
- "model.layers.20.self_attn.o_proj",
257
- "model.layers.21.self_attn.o_proj",
258
- "model.layers.22.self_attn.o_proj",
259
- "model.layers.23.self_attn.o_proj",
260
- "model.layers.24.self_attn.o_proj",
261
- "model.layers.25.self_attn.o_proj",
262
- "model.layers.26.self_attn.o_proj",
263
- "model.layers.27.self_attn.o_proj",
264
- "model.layers.28.self_attn.o_proj",
265
- "model.layers.29.self_attn.o_proj",
266
- "model.layers.30.self_attn.o_proj",
267
- "model.layers.31.self_attn.o_proj",
268
- "model.layers.32.self_attn.o_proj",
269
- "model.layers.33.self_attn.o_proj",
270
- "model.layers.34.self_attn.o_proj",
271
- "model.layers.35.self_attn.o_proj",
272
- "model.layers.36.self_attn.o_proj",
273
- "model.layers.37.self_attn.o_proj",
274
- "model.layers.38.self_attn.o_proj",
275
- "model.layers.39.self_attn.o_proj",
276
- "model.layers.40.self_attn.o_proj",
277
- "model.layers.41.self_attn.o_proj",
278
- "model.layers.42.self_attn.o_proj",
279
- "model.layers.43.self_attn.o_proj",
280
- "model.layers.44.self_attn.o_proj",
281
- "model.layers.45.self_attn.o_proj",
282
- "model.layers.46.self_attn.o_proj",
283
- "model.layers.47.self_attn.o_proj",
284
- "model.decoder.self_attn.o_proj"
285
- ],
286
- "weight_block_size": [
287
- 128,
288
- 128
289
- ]
290
- },
291
- "rope_scaling": {
292
- "rope_type": "default",
293
- "type": "default"
294
- },
295
- "rope_theta": 5000000,
296
- "routed_scaling_factor": null,
297
- "scoring_func": "sigmoid",
298
- "sliding_window": 128,
299
- "sliding_window_size": 128,
300
- "swa_rope_theta": 10000,
301
- "tie_word_embeddings": false,
302
- "topk_group": 1,
303
- "topk_method": "noaux_tc",
304
- "transformers_version": "4.57.1",
305
- "use_cache": true,
306
- "v_head_dim": 128,
307
- "video_token_id": 151656,
308
- "vision_config": {
309
- "depth": 28,
310
- "fullatt_block_indexes": [
311
- 0,
312
- 9,
313
- 18,
314
- 27
315
- ],
316
- "hidden_act": "silu",
317
- "hidden_size": 1280,
318
- "in_chans": 3,
319
- "intermediate_size": 4608,
320
- "num_heads": 32,
321
- "num_key_value_heads": 8,
322
- "num_query_groups": 4,
323
- "out_hidden_size": 4096,
324
- "patch_size": 16,
325
- "spatial_merge_size": 2,
326
- "spatial_patch_size": 16,
327
- "temporal_patch_size": 2,
328
- "tokens_per_second": 2,
329
- "use_sink": true,
330
- "visual_token_window_size": 64,
331
- "vit_window_attn_types": [
332
- -1,
333
- 0,
334
- 0,
335
- 0,
336
- 0,
337
- 1,
338
- 1,
339
- 1,
340
- 1,
341
- -1,
342
- 0,
343
- 0,
344
- 0,
345
- 0,
346
- 1,
347
- 1,
348
- 1,
349
- 1,
350
- -1,
351
- 0,
352
- 0,
353
- 0,
354
- 0,
355
- 1,
356
- 1,
357
- 1,
358
- 1,
359
- -1
360
- ],
361
- "window_size": 128
362
- },
363
- "vision_end_token_id": 151653,
364
- "vision_model_type": "mimovl",
365
- "vision_start_token_id": 151652,
366
- "vocab_size": 152576
367
- }
 
1
+ {
2
+ "architectures": [
3
+ "MiMoV2ForCausalLM"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_mimo_v2.MiMoV2Config",
7
+ "AutoModel": "modeling_mimo_v2.MiMoV2Model",
8
+ "AutoModelForCausalLM": "modeling_mimo_v2.MiMoV2ForCausalLM"
9
+ },
10
+ "attention_bias": false,
11
+ "attention_chunk_size": 128,
12
+ "attention_dropout": 0.0,
13
+ "attention_value_scale": 0.707,
14
+ "attention_projection_layout": "fused_qkv",
15
+ "add_full_attention_sink_bias": false,
16
+ "add_swa_attention_sink_bias": true,
17
+ "audio_config": {
18
+ "add_post_norm": true,
19
+ "audio_channels": 20,
20
+ "audio_segment_size": 6000,
21
+ "group_size": 4,
22
+ "input_full_attention": true,
23
+ "input_local_attn_heads": 16,
24
+ "input_local_dim": 1024,
25
+ "input_local_head_dim": 64,
26
+ "input_local_hidden_dropout": 0.0,
27
+ "input_local_intermediate_size": 4096,
28
+ "input_local_layers": 6,
29
+ "out_hidden_size": 4096,
30
+ "partial_rotary_factor": 1.0,
31
+ "projection_layers": 2,
32
+ "rope_theta": 640000,
33
+ "speech_vocab_size": "1280",
34
+ "speech_zeroemb_idx": "1024"
35
+ },
36
+ "swa_num_key_value_heads": 8,
37
+ "swa_num_attention_heads": 64,
38
+ "swa_head_dim": 192,
39
+ "swa_v_head_dim": 128,
40
+ "dtype": "bfloat16",
41
+ "eos_token_id": 151645,
42
+ "head_dim": 192,
43
+ "hidden_act": "silu",
44
+ "hidden_size": 4096,
45
+ "hybrid_block_size": null,
46
+ "hybrid_layer_pattern": [
47
+ 0,
48
+ 1,
49
+ 1,
50
+ 1,
51
+ 1,
52
+ 0,
53
+ 1,
54
+ 1,
55
+ 1,
56
+ 1,
57
+ 1,
58
+ 0,
59
+ 1,
60
+ 1,
61
+ 1,
62
+ 1,
63
+ 1,
64
+ 0,
65
+ 1,
66
+ 1,
67
+ 1,
68
+ 1,
69
+ 1,
70
+ 0,
71
+ 1,
72
+ 1,
73
+ 1,
74
+ 1,
75
+ 1,
76
+ 0,
77
+ 1,
78
+ 1,
79
+ 1,
80
+ 1,
81
+ 1,
82
+ 0,
83
+ 1,
84
+ 1,
85
+ 1,
86
+ 1,
87
+ 1,
88
+ 0,
89
+ 1,
90
+ 1,
91
+ 1,
92
+ 1,
93
+ 1,
94
+ 0
95
+ ],
96
+ "image_token_id": 151655,
97
+ "initializer_range": 0.02,
98
+ "intermediate_size": 16384,
99
+ "layernorm_epsilon": 1e-05,
100
+ "max_position_embeddings": 262144,
101
+ "model_type": "mimo_v2",
102
+ "moe_intermediate_size": 2048,
103
+ "moe_layer_freq": [
104
+ 0,
105
+ 1,
106
+ 1,
107
+ 1,
108
+ 1,
109
+ 1,
110
+ 1,
111
+ 1,
112
+ 1,
113
+ 1,
114
+ 1,
115
+ 1,
116
+ 1,
117
+ 1,
118
+ 1,
119
+ 1,
120
+ 1,
121
+ 1,
122
+ 1,
123
+ 1,
124
+ 1,
125
+ 1,
126
+ 1,
127
+ 1,
128
+ 1,
129
+ 1,
130
+ 1,
131
+ 1,
132
+ 1,
133
+ 1,
134
+ 1,
135
+ 1,
136
+ 1,
137
+ 1,
138
+ 1,
139
+ 1,
140
+ 1,
141
+ 1,
142
+ 1,
143
+ 1,
144
+ 1,
145
+ 1,
146
+ 1,
147
+ 1,
148
+ 1,
149
+ 1,
150
+ 1,
151
+ 1
152
+ ],
153
+ "n_group": 1,
154
+ "n_routed_experts": 256,
155
+ "n_shared_experts": null,
156
+ "norm_topk_prob": true,
157
+ "num_attention_heads": 64,
158
+ "num_experts_per_tok": 8,
159
+ "num_hidden_layers": 48,
160
+ "num_key_value_heads": 4,
161
+ "pad_token_id": 151643,
162
+ "partial_rotary_factor": 0.334,
163
+ "processor_config": {
164
+ "audio_avg_pooler": 2,
165
+ "audio_channels": 20,
166
+ "audio_end_token_id": 151674,
167
+ "audio_fmax": null,
168
+ "audio_fmin": 0,
169
+ "audio_group_size": 4,
170
+ "audio_hop_length": 240,
171
+ "audio_input_id_per_second": 25.0,
172
+ "audio_kernel_size": 3,
173
+ "audio_n_mels": 128,
174
+ "audio_nfft": 960,
175
+ "audio_sampling_rate": 24000,
176
+ "audio_segment_size": 6000,
177
+ "audio_start_token_id": 151673,
178
+ "audio_stride_size": 2,
179
+ "audio_token_id": 151669,
180
+ "audio_window_size": 960,
181
+ "audio_zeroemb_idx": [
182
+ 1024,
183
+ 1024,
184
+ 1024,
185
+ 1024,
186
+ 1024,
187
+ 1024,
188
+ 1024,
189
+ 1024,
190
+ 1024,
191
+ 1024,
192
+ 1024,
193
+ 1024,
194
+ 1024,
195
+ 1024,
196
+ 1024,
197
+ 1024,
198
+ 1024,
199
+ 1024,
200
+ 1024,
201
+ 1024
202
+ ],
203
+ "fps": 1.0,
204
+ "image_max_pixels": 8388608,
205
+ "image_min_pixels": 8192,
206
+ "image_token_id": 151655,
207
+ "max_frames": 1024,
208
+ "merge_size": 2,
209
+ "min_frames": null,
210
+ "num_frames": null,
211
+ "pad_token_id": 151643,
212
+ "patch_size": 16,
213
+ "rope_type": "rope",
214
+ "temporal_compression_ratio": 1,
215
+ "temporal_patch_size": 2,
216
+ "use_per_grid_t_timestamps": false,
217
+ "use_video_timestamps": true,
218
+ "video_audio_interleave_length": 0.0,
219
+ "video_end_token_id": 151671,
220
+ "video_max_pixels": 8388608,
221
+ "video_min_pixels": 8192,
222
+ "video_process_num_threads": 16,
223
+ "video_start_token_id": 151670,
224
+ "video_token_id": 151656,
225
+ "video_tokens_per_second": 2,
226
+ "video_total_max_pixels": 134217728,
227
+ "vision_end_token_id": 151653,
228
+ "vision_start_token_id": 151652
229
+ },
230
+ "quantization_config": {
231
+ "activation_scheme": "dynamic",
232
+ "fmt": "e4m3",
233
+ "quant_method": "fp8",
234
+ "store_dtype": "fp8",
235
+ "ignored_layers": [
236
+ "model.layers.0.self_attn.o_proj",
237
+ "model.layers.1.self_attn.o_proj",
238
+ "model.layers.2.self_attn.o_proj",
239
+ "model.layers.3.self_attn.o_proj",
240
+ "model.layers.4.self_attn.o_proj",
241
+ "model.layers.5.self_attn.o_proj",
242
+ "model.layers.6.self_attn.o_proj",
243
+ "model.layers.7.self_attn.o_proj",
244
+ "model.layers.8.self_attn.o_proj",
245
+ "model.layers.9.self_attn.o_proj",
246
+ "model.layers.10.self_attn.o_proj",
247
+ "model.layers.11.self_attn.o_proj",
248
+ "model.layers.12.self_attn.o_proj",
249
+ "model.layers.13.self_attn.o_proj",
250
+ "model.layers.14.self_attn.o_proj",
251
+ "model.layers.15.self_attn.o_proj",
252
+ "model.layers.16.self_attn.o_proj",
253
+ "model.layers.17.self_attn.o_proj",
254
+ "model.layers.18.self_attn.o_proj",
255
+ "model.layers.19.self_attn.o_proj",
256
+ "model.layers.20.self_attn.o_proj",
257
+ "model.layers.21.self_attn.o_proj",
258
+ "model.layers.22.self_attn.o_proj",
259
+ "model.layers.23.self_attn.o_proj",
260
+ "model.layers.24.self_attn.o_proj",
261
+ "model.layers.25.self_attn.o_proj",
262
+ "model.layers.26.self_attn.o_proj",
263
+ "model.layers.27.self_attn.o_proj",
264
+ "model.layers.28.self_attn.o_proj",
265
+ "model.layers.29.self_attn.o_proj",
266
+ "model.layers.30.self_attn.o_proj",
267
+ "model.layers.31.self_attn.o_proj",
268
+ "model.layers.32.self_attn.o_proj",
269
+ "model.layers.33.self_attn.o_proj",
270
+ "model.layers.34.self_attn.o_proj",
271
+ "model.layers.35.self_attn.o_proj",
272
+ "model.layers.36.self_attn.o_proj",
273
+ "model.layers.37.self_attn.o_proj",
274
+ "model.layers.38.self_attn.o_proj",
275
+ "model.layers.39.self_attn.o_proj",
276
+ "model.layers.40.self_attn.o_proj",
277
+ "model.layers.41.self_attn.o_proj",
278
+ "model.layers.42.self_attn.o_proj",
279
+ "model.layers.43.self_attn.o_proj",
280
+ "model.layers.44.self_attn.o_proj",
281
+ "model.layers.45.self_attn.o_proj",
282
+ "model.layers.46.self_attn.o_proj",
283
+ "model.layers.47.self_attn.o_proj",
284
+ "model.decoder.self_attn.o_proj"
285
+ ],
286
+ "weight_block_size": [
287
+ 128,
288
+ 128
289
+ ]
290
+ },
291
+ "rope_scaling": {
292
+ "rope_type": "default",
293
+ "type": "default"
294
+ },
295
+ "rope_theta": 5000000,
296
+ "routed_scaling_factor": null,
297
+ "scoring_func": "sigmoid",
298
+ "sliding_window": 128,
299
+ "sliding_window_size": 128,
300
+ "swa_rope_theta": 10000,
301
+ "tie_word_embeddings": false,
302
+ "topk_group": 1,
303
+ "topk_method": "noaux_tc",
304
+ "transformers_version": "4.57.1",
305
+ "use_cache": true,
306
+ "v_head_dim": 128,
307
+ "video_token_id": 151656,
308
+ "vision_config": {
309
+ "depth": 28,
310
+ "fullatt_block_indexes": [
311
+ 0,
312
+ 9,
313
+ 18,
314
+ 27
315
+ ],
316
+ "hidden_act": "silu",
317
+ "hidden_size": 1280,
318
+ "in_chans": 3,
319
+ "intermediate_size": 4608,
320
+ "num_heads": 32,
321
+ "num_key_value_heads": 8,
322
+ "num_query_groups": 4,
323
+ "out_hidden_size": 4096,
324
+ "patch_size": 16,
325
+ "spatial_merge_size": 2,
326
+ "spatial_patch_size": 16,
327
+ "temporal_patch_size": 2,
328
+ "tokens_per_second": 2,
329
+ "use_sink": true,
330
+ "visual_token_window_size": 64,
331
+ "vit_window_attn_types": [
332
+ -1,
333
+ 0,
334
+ 0,
335
+ 0,
336
+ 0,
337
+ 1,
338
+ 1,
339
+ 1,
340
+ 1,
341
+ -1,
342
+ 0,
343
+ 0,
344
+ 0,
345
+ 0,
346
+ 1,
347
+ 1,
348
+ 1,
349
+ 1,
350
+ -1,
351
+ 0,
352
+ 0,
353
+ 0,
354
+ 0,
355
+ 1,
356
+ 1,
357
+ 1,
358
+ 1,
359
+ -1
360
+ ],
361
+ "window_size": 128
362
+ },
363
+ "vision_end_token_id": 151653,
364
+ "vision_model_type": "mimovl",
365
+ "vision_start_token_id": 151652,
366
+ "vocab_size": 152576
367
+ }
tokenizer_config.json CHANGED
@@ -281,7 +281,7 @@
281
  "<|mimo_audio_end|>"
282
  ],
283
  "bos_token": null,
284
- "chat_template": "{%- if not add_generation_prompt is defined -%}\n {%- set add_generation_prompt = false -%}\n{%- endif -%}\n{%- if not enable_thinking is defined -%}\n {%- set enable_thinking = false -%}\n{%- endif -%}\n{%- if not keep_all_reasoning is defined -%}\n {%- set keep_all_reasoning = true -%}\n{%- endif -%}\n{%- macro render_extra_keys(json_dict, handled_keys) -%}\n {%- if json_dict is mapping %}\n {%- for json_key in json_dict if json_key not in handled_keys %}\n {%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %}\n {{- '\\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '</' ~ json_key ~ '>' }}\n {%- else %}\n {{-'\\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '</' ~ json_key ~ '>' }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n{%- endmacro -%}\n{%- macro render_content(message_content) -%}\n {%- if message_content is string -%}\n {{- message_content -}}\n {%- else -%}\n {%- for content in message_content -%}\n {%- if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}\n {{- '<|vision_start|><|image_pad|><|vision_end|>' -}}\n {%- elif content['type'] == 'audio' or 'audio' in content or 'audio_url' in content -%}\n {{- '<|mimo_audio_start|><|audio_pad|><|mimo_audio_end|>' -}}\n {%- elif content['type'] == 'video' or 'video' in content or 'video_url' in content -%}\n {{- '<|vision_start|><|video_pad|><|vision_end|>' -}}\n {%- elif 'text' in content -%}\n {{- content['text'] -}}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n{%- endmacro -%}\n{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- set ns = namespace(last_user_index=-1) %}\n{%- for m in loop_messages %}\n {%- if m.role == 'user' %}\n {%- set ns.last_user_index = loop.index0 -%}\n {%- endif %}\n{%- endfor %}\n{%- if not tools is defined %}\n {%- set tools = [] %}\n{%- endif %}\n{%- if system_message is defined %}\n {{- \"<|im_start|>system\\n\" + system_message }}\n{%- else %}\n {{- \"<|im_start|>system\\nYou are MiMo, a helpful AI assistant engineered by Xiaomi.\" }}\n{%- endif %}\n{%- if tools is iterable and tools | length > 0 %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou have access to the following functions:\\n\\n\" }}\n {{- \"<tools>\" }}\n {%- for tool in tools %}\n {%- if tool.function is defined %}\n {%- set tool = tool.function %}\n {%- endif %}\n {{- \"\\n<function>\\n<name>\" ~ tool.name ~ \"</name>\" }}\n {%- if tool.description is defined %}\n {{- '\\n<description>' ~ (tool.description | trim) ~ '</description>' }}\n {%- endif %}\n {{- '\\n<parameters>' }}\n {%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}\n {%- for param_name, param_fields in tool.parameters.properties|items %}\n {{- '\\n<parameter>' }}\n {{- '\\n<name>' ~ param_name ~ '</name>' }}\n {%- if param_fields.type is defined %}\n {{- '\\n<type>' ~ (param_fields.type | string) ~ '</type>' }}\n {%- endif %}\n {%- if param_fields.description is defined %}\n {{- '\\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}\n {%- endif %}\n {%- set handled_keys = ['name', 'type', 'description'] %}\n {{- render_extra_keys(param_fields, handled_keys) }}\n {{- '\\n</parameter>' }}\n {%- endfor %}\n {%- endif %}\n {%- set handled_keys = ['type', 'properties'] %}\n {{- render_extra_keys(tool.parameters, handled_keys) }}\n {{- '\\n</parameters>' }}\n {%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}\n {{- render_extra_keys(tool, handled_keys) }}\n {{- '\\n</function>' }}\n {%- endfor %}\n {{- \"\\n</tools>\" }}\n {{- '\\n\\nFor each function call, output the function name and arguments in the following format:\\n<tool_call>\\n<function=example_function_name>\\n<parameter=example_parameter_1>value_1</parameter>\\n<parameter=example_parameter_2>This is the value for the second parameter\\nthat can span\\nmultiple lines</parameter>\\n</function>\\n</tool_call>\\n\\n<IMPORTANT>\\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\\n- DO NOT use function calls inside <think></think> tags.\\n- The value enclosed between parameter tags is preserved exactly as-is, including newlines and spaces.\\n</IMPORTANT>' }}\n{%- endif %}\n{{- '<|im_end|>' }}\n{%- for message in loop_messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = render_content(message.content) %}\n {%- endif %}\n {%- if message.role == \"assistant\" %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- set reasoning_content = '' %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].split('<think>')[-1] %}\n {%- set content = content.split('</think>')[-1] %}\n {%- endif %}\n {%- endif %}\n {%- if (keep_all_reasoning or loop.index0 > ns.last_user_index) and reasoning_content -%}\n {{- '<|im_start|>' + message.role + '\\n<think>' + reasoning_content + '</think>' + content }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n<think></think>' + content }}\n {%- endif %}\n {%- if message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n<function=' + tool_call.name + '>\\n' }}\n {%- if tool_call.arguments is defined %}\n {%- for args_name, args_value in tool_call.arguments|items %}\n {{- '<parameter=' + args_name + '>' }}\n {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}\n {{- args_value }}\n {{- '</parameter>\\n' }}\n {%- endfor %}\n {%- endif %}\n {{- '</function>\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>' }}\n {%- elif message.role == \"user\" %}\n {{- '<|im_start|>' + message.role + '\\n' + render_content(message.content) + '<|im_end|>' }}\n {%- elif message.role == \"system\" %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.previtem and loop.previtem.role != \"tool\" %}\n {{- '<|im_start|>tool\\n' }}\n {%- endif %}\n {{- '<tool_response>\\n' }}\n {{- render_content(message.content) }}\n {{- '\\n</tool_response>\\n' }}\n {%- if not loop.last and loop.nextitem.role != \"tool\" %}\n {{- '<|im_end|>' }}\n {%- elif loop.last %}\n {{- '<|im_end|>' }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if not enable_thinking -%}\n {{- '<think></think>' -}}\n {%- else -%}\n {{- '' -}}\n {%- endif -%}\n{%- endif %}\n",
285
  "clean_up_tokenization_spaces": false,
286
  "eos_token": "<|im_end|>",
287
  "errors": "replace",
 
281
  "<|mimo_audio_end|>"
282
  ],
283
  "bos_token": null,
284
+ "chat_template": "{%- if not add_generation_prompt is defined -%}\n {%- set add_generation_prompt = false -%}\n{%- endif -%}\n{%- if not enable_thinking is defined -%}\n {%- set enable_thinking = true -%}\n{%- endif -%}\n{%- if not keep_all_reasoning is defined -%}\n {%- set keep_all_reasoning = true -%}\n{%- endif -%}\n{%- macro render_extra_keys(json_dict, handled_keys) -%}\n {%- if json_dict is mapping %}\n {%- for json_key in json_dict if json_key not in handled_keys %}\n {%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %}\n {{- '\\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '</' ~ json_key ~ '>' }}\n {%- else %}\n {{-'\\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '</' ~ json_key ~ '>' }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n{%- endmacro -%}\n{%- macro render_content(message_content) -%}\n {%- if message_content is string -%}\n {{- message_content -}}\n {%- else -%}\n {%- for content in message_content -%}\n {%- if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}\n {{- '<|vision_start|><|image_pad|><|vision_end|>' -}}\n {%- elif content['type'] == 'audio' or 'audio' in content or 'audio_url' in content -%}\n {{- '<|mimo_audio_start|><|audio_pad|><|mimo_audio_end|>' -}}\n {%- elif content['type'] == 'video' or 'video' in content or 'video_url' in content -%}\n {{- '<|vision_start|><|video_pad|><|vision_end|>' -}}\n {%- elif 'text' in content -%}\n {{- content['text'] -}}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n{%- endmacro -%}\n{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- set ns = namespace(last_user_index=-1) %}\n{%- for m in loop_messages %}\n {%- if m.role == 'user' %}\n {%- set ns.last_user_index = loop.index0 -%}\n {%- endif %}\n{%- endfor %}\n{%- if not tools is defined %}\n {%- set tools = [] %}\n{%- endif %}\n{%- if system_message is defined %}\n {{- \"<|im_start|>system\\n\" + system_message }}\n{%- else %}\n {{- \"<|im_start|>system\\nYou are MiMo, a helpful AI assistant engineered by Xiaomi.\" }}\n{%- endif %}\n{%- if tools is iterable and tools | length > 0 %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou have access to the following functions:\\n\\n\" }}\n {{- \"<tools>\" }}\n {%- for tool in tools %}\n {%- if tool.function is defined %}\n {%- set tool = tool.function %}\n {%- endif %}\n {{- \"\\n<function>\\n<name>\" ~ tool.name ~ \"</name>\" }}\n {%- if tool.description is defined %}\n {{- '\\n<description>' ~ (tool.description | trim) ~ '</description>' }}\n {%- endif %}\n {{- '\\n<parameters>' }}\n {%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}\n {%- for param_name, param_fields in tool.parameters.properties|items %}\n {{- '\\n<parameter>' }}\n {{- '\\n<name>' ~ param_name ~ '</name>' }}\n {%- if param_fields.type is defined %}\n {{- '\\n<type>' ~ (param_fields.type | string) ~ '</type>' }}\n {%- endif %}\n {%- if param_fields.description is defined %}\n {{- '\\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}\n {%- endif %}\n {%- set handled_keys = ['name', 'type', 'description'] %}\n {{- render_extra_keys(param_fields, handled_keys) }}\n {{- '\\n</parameter>' }}\n {%- endfor %}\n {%- endif %}\n {%- set handled_keys = ['type', 'properties'] %}\n {{- render_extra_keys(tool.parameters, handled_keys) }}\n {{- '\\n</parameters>' }}\n {%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}\n {{- render_extra_keys(tool, handled_keys) }}\n {{- '\\n</function>' }}\n {%- endfor %}\n {{- \"\\n</tools>\" }}\n {{- '\\n\\nFor each function call, output the function name and arguments in the following format:\\n<tool_call>\\n<function=example_function_name>\\n<parameter=example_parameter_1>value_1</parameter>\\n<parameter=example_parameter_2>This is the value for the second parameter\\nthat can span\\nmultiple lines</parameter>\\n</function>\\n</tool_call>\\n\\n<IMPORTANT>\\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\\n- DO NOT use function calls inside <think></think> tags.\\n- The value enclosed between parameter tags is preserved exactly as-is, including newlines and spaces.\\n</IMPORTANT>' }}\n{%- endif %}\n{{- '<|im_end|>' }}\n{%- for message in loop_messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = render_content(message.content) %}\n {%- endif %}\n {%- if message.role == \"assistant\" %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- set reasoning_content = '' %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].split('<think>')[-1] %}\n {%- set content = content.split('</think>')[-1] %}\n {%- endif %}\n {%- endif %}\n {%- if (keep_all_reasoning or loop.index0 > ns.last_user_index) and reasoning_content -%}\n {{- '<|im_start|>' + message.role + '\\n<think>' + reasoning_content + '</think>' + content }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n<think></think>' + content }}\n {%- endif %}\n {%- if message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n<function=' + tool_call.name + '>\\n' }}\n {%- if tool_call.arguments is defined %}\n {%- for args_name, args_value in tool_call.arguments|items %}\n {{- '<parameter=' + args_name + '>' }}\n {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}\n {{- args_value }}\n {{- '</parameter>\\n' }}\n {%- endfor %}\n {%- endif %}\n {{- '</function>\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>' }}\n {%- elif message.role == \"user\" %}\n {{- '<|im_start|>' + message.role + '\\n' + render_content(message.content) + '<|im_end|>' }}\n {%- elif message.role == \"system\" %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.previtem and loop.previtem.role != \"tool\" %}\n {{- '<|im_start|>tool\\n' }}\n {%- endif %}\n {{- '<tool_response>\\n' }}\n {{- render_content(message.content) }}\n {{- '\\n</tool_response>\\n' }}\n {%- if not loop.last and loop.nextitem.role != \"tool\" %}\n {{- '<|im_end|>' }}\n {%- elif loop.last %}\n {{- '<|im_end|>' }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if not enable_thinking -%}\n {{- '<think></think>' -}}\n {%- else -%}\n {{- '' -}}\n {%- endif -%}\n{%- endif %}\n",
285
  "clean_up_tokenization_spaces": false,
286
  "eos_token": "<|im_end|>",
287
  "errors": "replace",