MiMo-V2.5 / config.json
invinciblejha01's picture
Duplicate from XiaomiMiMo/MiMo-V2.5
1f6acd7
{
"architectures": [
"MiMoV2ForCausalLM"
],
"auto_map": {
"AutoConfig": "configuration_mimo_v2.MiMoV2Config",
"AutoModel": "modeling_mimo_v2.MiMoV2Model",
"AutoModelForCausalLM": "modeling_mimo_v2.MiMoV2ForCausalLM"
},
"attention_bias": false,
"attention_chunk_size": 128,
"attention_dropout": 0.0,
"attention_value_scale": 0.707,
"attention_projection_layout": "fused_qkv",
"add_full_attention_sink_bias": false,
"add_swa_attention_sink_bias": true,
"audio_config": {
"add_post_norm": true,
"audio_channels": 20,
"audio_segment_size": 6000,
"group_size": 4,
"input_full_attention": true,
"input_local_attn_heads": 16,
"input_local_dim": 1024,
"input_local_head_dim": 64,
"input_local_hidden_dropout": 0.0,
"input_local_intermediate_size": 4096,
"input_local_layers": 6,
"out_hidden_size": 4096,
"partial_rotary_factor": 1.0,
"projection_layers": 2,
"rope_theta": 640000,
"speech_vocab_size": "1280",
"speech_zeroemb_idx": "1024"
},
"swa_num_key_value_heads": 8,
"swa_num_attention_heads": 64,
"swa_head_dim": 192,
"swa_v_head_dim": 128,
"dtype": "bfloat16",
"eos_token_id": 151645,
"head_dim": 192,
"hidden_act": "silu",
"hidden_size": 4096,
"hybrid_block_size": null,
"hybrid_layer_pattern": [
0,
1,
1,
1,
1,
0,
1,
1,
1,
1,
1,
0,
1,
1,
1,
1,
1,
0,
1,
1,
1,
1,
1,
0,
1,
1,
1,
1,
1,
0,
1,
1,
1,
1,
1,
0,
1,
1,
1,
1,
1,
0,
1,
1,
1,
1,
1,
0
],
"image_token_id": 151655,
"initializer_range": 0.02,
"intermediate_size": 16384,
"layernorm_epsilon": 1e-05,
"max_position_embeddings": 262144,
"model_type": "mimo_v2",
"moe_intermediate_size": 2048,
"moe_layer_freq": [
0,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1
],
"n_group": 1,
"n_routed_experts": 256,
"n_shared_experts": null,
"norm_topk_prob": true,
"num_attention_heads": 64,
"num_experts_per_tok": 8,
"num_hidden_layers": 48,
"num_key_value_heads": 4,
"pad_token_id": 151643,
"partial_rotary_factor": 0.334,
"processor_config": {
"audio_avg_pooler": 2,
"audio_channels": 20,
"audio_end_token_id": 151674,
"audio_fmax": null,
"audio_fmin": 0,
"audio_group_size": 4,
"audio_hop_length": 240,
"audio_input_id_per_second": 25.0,
"audio_kernel_size": 3,
"audio_n_mels": 128,
"audio_nfft": 960,
"audio_sampling_rate": 24000,
"audio_segment_size": 6000,
"audio_start_token_id": 151673,
"audio_stride_size": 2,
"audio_token_id": 151669,
"audio_window_size": 960,
"audio_zeroemb_idx": [
1024,
1024,
1024,
1024,
1024,
1024,
1024,
1024,
1024,
1024,
1024,
1024,
1024,
1024,
1024,
1024,
1024,
1024,
1024,
1024
],
"fps": 1.0,
"image_max_pixels": 8388608,
"image_min_pixels": 8192,
"image_token_id": 151655,
"max_frames": 1024,
"merge_size": 2,
"min_frames": null,
"num_frames": null,
"pad_token_id": 151643,
"patch_size": 16,
"rope_type": "rope",
"temporal_compression_ratio": 1,
"temporal_patch_size": 2,
"use_per_grid_t_timestamps": false,
"use_video_timestamps": true,
"video_audio_interleave_length": 0.0,
"video_end_token_id": 151671,
"video_max_pixels": 8388608,
"video_min_pixels": 8192,
"video_process_num_threads": 16,
"video_start_token_id": 151670,
"video_token_id": 151656,
"video_tokens_per_second": 2,
"video_total_max_pixels": 67108864,
"vision_end_token_id": 151653,
"vision_start_token_id": 151652
},
"quantization_config": {
"activation_scheme": "dynamic",
"fmt": "e4m3",
"quant_method": "fp8",
"store_dtype": "fp8",
"ignored_layers": [
"model.layers.0.self_attn.o_proj",
"model.layers.1.self_attn.o_proj",
"model.layers.2.self_attn.o_proj",
"model.layers.3.self_attn.o_proj",
"model.layers.4.self_attn.o_proj",
"model.layers.5.self_attn.o_proj",
"model.layers.6.self_attn.o_proj",
"model.layers.7.self_attn.o_proj",
"model.layers.8.self_attn.o_proj",
"model.layers.9.self_attn.o_proj",
"model.layers.10.self_attn.o_proj",
"model.layers.11.self_attn.o_proj",
"model.layers.12.self_attn.o_proj",
"model.layers.13.self_attn.o_proj",
"model.layers.14.self_attn.o_proj",
"model.layers.15.self_attn.o_proj",
"model.layers.16.self_attn.o_proj",
"model.layers.17.self_attn.o_proj",
"model.layers.18.self_attn.o_proj",
"model.layers.19.self_attn.o_proj",
"model.layers.20.self_attn.o_proj",
"model.layers.21.self_attn.o_proj",
"model.layers.22.self_attn.o_proj",
"model.layers.23.self_attn.o_proj",
"model.layers.24.self_attn.o_proj",
"model.layers.25.self_attn.o_proj",
"model.layers.26.self_attn.o_proj",
"model.layers.27.self_attn.o_proj",
"model.layers.28.self_attn.o_proj",
"model.layers.29.self_attn.o_proj",
"model.layers.30.self_attn.o_proj",
"model.layers.31.self_attn.o_proj",
"model.layers.32.self_attn.o_proj",
"model.layers.33.self_attn.o_proj",
"model.layers.34.self_attn.o_proj",
"model.layers.35.self_attn.o_proj",
"model.layers.36.self_attn.o_proj",
"model.layers.37.self_attn.o_proj",
"model.layers.38.self_attn.o_proj",
"model.layers.39.self_attn.o_proj",
"model.layers.40.self_attn.o_proj",
"model.layers.41.self_attn.o_proj",
"model.layers.42.self_attn.o_proj",
"model.layers.43.self_attn.o_proj",
"model.layers.44.self_attn.o_proj",
"model.layers.45.self_attn.o_proj",
"model.layers.46.self_attn.o_proj",
"model.layers.47.self_attn.o_proj",
"model.decoder.self_attn.o_proj"
],
"weight_block_size": [
128,
128
]
},
"rope_scaling": {
"rope_type": "default",
"type": "default"
},
"rope_theta": 5000000,
"routed_scaling_factor": null,
"scoring_func": "sigmoid",
"sliding_window": 128,
"sliding_window_size": 128,
"swa_rope_theta": 10000,
"tie_word_embeddings": false,
"topk_group": 1,
"topk_method": "noaux_tc",
"transformers_version": "4.57.1",
"use_cache": true,
"v_head_dim": 128,
"video_token_id": 151656,
"vision_config": {
"depth": 28,
"fullatt_block_indexes": [
0,
9,
18,
27
],
"hidden_act": "silu",
"hidden_size": 1280,
"in_chans": 3,
"intermediate_size": 4608,
"num_heads": 32,
"num_key_value_heads": 8,
"num_query_groups": 4,
"out_hidden_size": 4096,
"patch_size": 16,
"spatial_merge_size": 2,
"spatial_patch_size": 16,
"temporal_patch_size": 2,
"tokens_per_second": 2,
"use_sink": true,
"visual_token_window_size": 64,
"vit_window_attn_types": [
-1,
0,
0,
0,
0,
1,
1,
1,
1,
-1,
0,
0,
0,
0,
1,
1,
1,
1,
-1,
0,
0,
0,
0,
1,
1,
1,
1,
-1
],
"window_size": 128
},
"vision_end_token_id": 151653,
"vision_model_type": "mimovl",
"vision_start_token_id": 151652,
"vocab_size": 152576
}