juliendenize's picture
Fix Transformers config
b6c512b verified
raw
history blame
90.3 kB
{
"architectures": [
"Mistral3ForConditionalGeneration"
],
"dtype": "bfloat16",
"image_token_index": 10,
"model_type": "mistral3",
"multimodal_projector_bias": false,
"projector_hidden_act": "gelu",
"quantization_config": {
"quant_method": "compressed-tensors",
"format": "mixed-precision",
"config_groups": {
"group_0": {
"format": "float-quantized",
"weights": {
"num_bits": 8,
"type": "float",
"strategy": "tensor",
"symmetric": true,
"dynamic": false,
"observer": "memoryless_minmax"
},
"input_activations": {
"num_bits": 8,
"type": "float",
"strategy": "tensor",
"symmetric": true,
"dynamic": false,
"observer": "memoryless_minmax"
},
"targets": [
"re:^language_model[.]model[.]layers[.]0[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]0[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]0[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]0[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]0[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]1[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]1[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]1[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]1[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]10[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]10[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]10[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]10[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]2[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]2[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]2[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]2[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]2[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]23[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]23[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]23[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]23[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]26[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]26[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]26[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]26[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]28[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]28[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]28[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]28[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]29[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]29[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]29[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]29[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]3[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]3[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]3[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]3[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]30[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]30[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]30[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]30[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]30[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]31[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]31[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]31[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]31[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]32[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]32[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]32[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]32[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]33[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]33[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]33[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]33[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]33[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]34[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]34[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]34[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]34[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]34[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]35[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]35[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]35[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]35[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]36[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]36[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]36[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]36[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]37[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]37[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]37[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]37[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]37[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]38[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]38[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]38[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]38[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]39[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]39[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]39[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]39[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]4[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]4[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]4[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]4[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]40[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]40[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]42[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]5[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]5[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]5[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]5[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]5[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]6[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]6[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]6[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]6[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]76[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]76[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]76[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]76[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]77[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]77[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]77[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]77[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]78[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]78[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]78[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]78[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]79[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]79[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]79[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]79[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]8[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]8[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]8[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]8[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]80[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]80[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]80[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]80[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]81[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]81[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]81[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]81[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]82[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]82[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]82[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]82[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]83[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]83[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]83[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]83[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]84[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]84[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]84[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]84[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]85[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]85[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]85[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]85[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]86[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]9[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]9[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]9[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]9[.]self_attn[.]v_proj$"
]
},
"group_1": {
"format": "nvfp4-pack-quantized",
"weights": {
"num_bits": 4,
"type": "float",
"strategy": "tensor_group",
"group_size": 16,
"symmetric": true,
"dynamic": false,
"scale_dtype": "torch.float8_e4m3fn",
"zp_dtype": "torch.float8_e4m3fn",
"observer": "memoryless_minmax"
},
"input_activations": {
"num_bits": 4,
"type": "float",
"strategy": "tensor_group",
"group_size": 16,
"symmetric": true,
"dynamic": "local",
"observer": "static_minmax",
"scale_dtype": "torch.float8_e4m3fn",
"zp_dtype": "torch.float8_e4m3fn"
},
"targets": [
"re:^language_model[.]model[.]layers[.]0[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]0[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]0[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]0[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]1[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]1[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]1[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]1[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]1[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]10[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]10[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]10[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]10[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]10[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]11[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]11[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]11[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]11[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]11[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]11[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]11[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]11[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]11[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]12[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]12[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]12[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]12[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]12[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]12[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]12[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]12[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]12[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]13[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]13[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]13[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]13[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]13[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]13[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]13[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]13[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]13[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]14[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]14[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]14[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]14[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]14[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]14[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]14[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]14[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]14[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]15[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]15[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]15[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]15[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]15[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]15[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]15[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]15[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]15[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]16[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]16[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]16[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]16[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]16[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]16[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]16[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]16[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]16[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]17[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]17[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]17[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]17[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]17[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]17[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]17[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]17[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]17[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]18[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]18[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]18[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]18[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]18[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]18[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]18[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]18[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]18[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]19[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]19[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]19[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]19[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]19[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]19[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]19[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]19[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]19[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]2[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]2[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]2[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]2[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]20[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]20[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]20[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]20[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]20[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]20[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]20[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]20[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]20[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]21[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]21[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]21[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]21[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]21[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]21[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]21[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]21[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]21[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]22[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]22[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]22[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]22[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]22[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]22[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]22[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]22[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]22[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]23[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]23[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]23[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]23[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]23[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]24[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]24[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]24[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]24[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]24[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]24[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]24[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]24[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]24[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]25[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]25[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]25[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]25[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]25[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]25[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]25[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]25[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]25[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]26[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]26[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]26[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]26[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]26[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]27[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]27[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]27[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]27[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]27[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]27[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]27[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]27[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]27[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]28[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]28[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]28[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]28[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]28[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]29[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]29[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]29[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]29[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]29[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]3[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]3[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]3[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]3[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]3[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]30[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]30[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]30[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]30[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]31[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]31[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]31[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]31[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]31[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]32[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]32[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]32[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]32[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]32[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]33[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]33[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]33[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]33[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]34[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]34[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]34[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]34[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]35[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]35[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]35[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]35[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]35[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]36[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]36[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]36[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]36[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]36[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]37[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]37[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]37[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]37[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]38[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]38[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]38[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]38[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]38[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]39[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]39[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]39[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]39[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]39[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]4[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]4[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]4[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]4[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]4[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]40[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]40[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]40[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]40[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]40[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]40[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]40[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]41[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]41[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]41[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]41[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]41[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]41[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]41[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]41[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]41[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]42[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]42[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]42[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]42[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]42[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]42[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]42[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]42[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]43[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]43[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]43[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]43[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]43[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]43[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]43[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]43[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]43[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]44[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]44[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]44[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]44[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]44[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]44[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]44[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]44[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]44[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]45[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]45[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]45[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]45[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]45[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]45[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]45[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]45[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]45[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]46[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]46[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]46[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]46[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]46[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]46[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]46[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]46[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]46[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]47[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]47[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]47[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]47[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]47[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]47[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]47[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]47[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]47[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]48[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]48[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]48[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]48[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]48[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]48[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]48[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]48[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]48[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]49[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]49[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]49[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]49[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]49[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]49[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]49[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]49[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]49[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]5[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]5[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]5[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]5[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]50[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]50[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]50[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]50[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]50[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]50[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]50[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]50[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]50[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]51[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]51[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]51[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]51[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]51[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]51[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]51[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]51[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]51[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]52[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]52[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]52[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]52[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]52[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]52[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]52[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]52[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]52[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]53[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]53[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]53[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]53[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]53[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]53[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]53[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]53[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]53[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]54[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]54[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]54[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]54[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]54[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]54[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]54[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]54[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]54[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]55[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]55[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]55[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]55[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]55[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]55[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]55[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]55[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]55[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]56[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]56[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]56[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]56[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]56[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]56[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]56[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]56[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]56[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]57[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]57[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]57[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]57[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]57[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]57[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]57[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]57[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]57[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]58[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]58[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]58[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]58[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]58[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]58[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]58[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]58[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]58[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]59[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]59[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]59[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]59[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]59[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]59[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]59[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]59[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]59[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]6[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]6[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]6[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]6[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]6[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]60[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]60[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]60[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]60[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]60[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]60[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]60[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]60[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]60[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]61[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]61[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]61[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]61[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]61[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]61[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]61[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]61[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]61[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]62[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]62[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]62[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]62[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]62[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]62[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]62[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]62[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]62[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]63[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]63[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]63[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]63[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]63[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]63[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]63[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]63[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]63[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]64[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]64[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]64[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]64[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]64[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]64[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]64[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]64[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]64[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]65[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]65[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]65[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]65[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]65[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]65[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]65[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]65[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]65[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]66[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]66[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]66[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]66[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]66[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]66[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]66[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]66[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]66[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]67[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]67[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]67[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]67[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]67[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]67[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]67[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]67[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]67[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]68[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]68[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]68[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]68[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]68[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]68[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]68[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]68[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]68[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]69[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]69[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]69[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]69[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]69[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]69[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]69[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]69[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]69[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]7[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]7[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]7[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]7[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]7[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]7[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]7[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]7[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]7[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]70[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]70[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]70[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]70[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]70[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]70[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]70[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]70[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]70[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]71[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]71[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]71[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]71[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]71[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]71[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]71[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]71[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]71[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]72[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]72[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]72[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]72[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]72[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]72[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]72[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]72[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]72[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]73[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]73[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]73[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]73[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]73[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]73[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]73[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]73[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]73[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]74[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]74[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]74[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]74[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]74[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]74[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]74[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]74[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]74[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]75[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]75[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]75[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]75[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]75[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]75[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]75[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]75[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]75[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]76[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]76[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]76[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]76[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]76[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]77[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]77[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]77[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]77[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]77[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]78[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]78[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]78[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]78[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]78[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]79[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]79[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]79[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]79[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]79[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]8[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]8[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]8[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]8[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]8[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]80[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]80[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]80[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]80[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]80[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]81[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]81[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]81[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]81[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]81[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]82[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]82[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]82[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]82[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]82[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]83[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]83[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]83[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]83[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]83[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]84[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]84[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]84[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]84[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]84[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]85[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]85[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]85[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]85[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]85[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]86[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]86[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]86[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]86[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]86[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]86[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]86[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]86[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]87[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]87[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]87[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]87[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]87[.]self_attn[.]k_proj$",
"re:^language_model[.]model[.]layers[.]87[.]self_attn[.]o_proj$",
"re:^language_model[.]model[.]layers[.]87[.]self_attn[.]q_proj$",
"re:^language_model[.]model[.]layers[.]87[.]self_attn[.]qkv_proj$",
"re:^language_model[.]model[.]layers[.]87[.]self_attn[.]v_proj$",
"re:^language_model[.]model[.]layers[.]9[.]mlp[.]down_proj$",
"re:^language_model[.]model[.]layers[.]9[.]mlp[.]gate_proj$",
"re:^language_model[.]model[.]layers[.]9[.]mlp[.]gate_up_proj$",
"re:^language_model[.]model[.]layers[.]9[.]mlp[.]up_proj$",
"re:^language_model[.]model[.]layers[.]9[.]self_attn[.]o_proj$"
]
}
},
"ignore": [
"language_model.lm_head",
"language_model.model.embed_tokens",
"multi_modal_projector.linear_1",
"multi_modal_projector.linear_2",
"multi_modal_projector.patch_merger.merging_layer",
"vision_tower.transformer.layers.0.attention.k_proj",
"vision_tower.transformer.layers.0.attention.o_proj",
"vision_tower.transformer.layers.0.attention.q_proj",
"vision_tower.transformer.layers.0.attention.qkv_proj",
"vision_tower.transformer.layers.0.attention.v_proj",
"vision_tower.transformer.layers.0.feed_forward.down_proj",
"vision_tower.transformer.layers.0.feed_forward.gate_proj",
"vision_tower.transformer.layers.0.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.0.feed_forward.up_proj",
"vision_tower.transformer.layers.1.attention.k_proj",
"vision_tower.transformer.layers.1.attention.o_proj",
"vision_tower.transformer.layers.1.attention.q_proj",
"vision_tower.transformer.layers.1.attention.qkv_proj",
"vision_tower.transformer.layers.1.attention.v_proj",
"vision_tower.transformer.layers.1.feed_forward.down_proj",
"vision_tower.transformer.layers.1.feed_forward.gate_proj",
"vision_tower.transformer.layers.1.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.1.feed_forward.up_proj",
"vision_tower.transformer.layers.10.attention.k_proj",
"vision_tower.transformer.layers.10.attention.o_proj",
"vision_tower.transformer.layers.10.attention.q_proj",
"vision_tower.transformer.layers.10.attention.qkv_proj",
"vision_tower.transformer.layers.10.attention.v_proj",
"vision_tower.transformer.layers.10.feed_forward.down_proj",
"vision_tower.transformer.layers.10.feed_forward.gate_proj",
"vision_tower.transformer.layers.10.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.10.feed_forward.up_proj",
"vision_tower.transformer.layers.11.attention.k_proj",
"vision_tower.transformer.layers.11.attention.o_proj",
"vision_tower.transformer.layers.11.attention.q_proj",
"vision_tower.transformer.layers.11.attention.qkv_proj",
"vision_tower.transformer.layers.11.attention.v_proj",
"vision_tower.transformer.layers.11.feed_forward.down_proj",
"vision_tower.transformer.layers.11.feed_forward.gate_proj",
"vision_tower.transformer.layers.11.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.11.feed_forward.up_proj",
"vision_tower.transformer.layers.12.attention.k_proj",
"vision_tower.transformer.layers.12.attention.o_proj",
"vision_tower.transformer.layers.12.attention.q_proj",
"vision_tower.transformer.layers.12.attention.qkv_proj",
"vision_tower.transformer.layers.12.attention.v_proj",
"vision_tower.transformer.layers.12.feed_forward.down_proj",
"vision_tower.transformer.layers.12.feed_forward.gate_proj",
"vision_tower.transformer.layers.12.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.12.feed_forward.up_proj",
"vision_tower.transformer.layers.13.attention.k_proj",
"vision_tower.transformer.layers.13.attention.o_proj",
"vision_tower.transformer.layers.13.attention.q_proj",
"vision_tower.transformer.layers.13.attention.qkv_proj",
"vision_tower.transformer.layers.13.attention.v_proj",
"vision_tower.transformer.layers.13.feed_forward.down_proj",
"vision_tower.transformer.layers.13.feed_forward.gate_proj",
"vision_tower.transformer.layers.13.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.13.feed_forward.up_proj",
"vision_tower.transformer.layers.14.attention.k_proj",
"vision_tower.transformer.layers.14.attention.o_proj",
"vision_tower.transformer.layers.14.attention.q_proj",
"vision_tower.transformer.layers.14.attention.qkv_proj",
"vision_tower.transformer.layers.14.attention.v_proj",
"vision_tower.transformer.layers.14.feed_forward.down_proj",
"vision_tower.transformer.layers.14.feed_forward.gate_proj",
"vision_tower.transformer.layers.14.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.14.feed_forward.up_proj",
"vision_tower.transformer.layers.15.attention.k_proj",
"vision_tower.transformer.layers.15.attention.o_proj",
"vision_tower.transformer.layers.15.attention.q_proj",
"vision_tower.transformer.layers.15.attention.qkv_proj",
"vision_tower.transformer.layers.15.attention.v_proj",
"vision_tower.transformer.layers.15.feed_forward.down_proj",
"vision_tower.transformer.layers.15.feed_forward.gate_proj",
"vision_tower.transformer.layers.15.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.15.feed_forward.up_proj",
"vision_tower.transformer.layers.16.attention.k_proj",
"vision_tower.transformer.layers.16.attention.o_proj",
"vision_tower.transformer.layers.16.attention.q_proj",
"vision_tower.transformer.layers.16.attention.qkv_proj",
"vision_tower.transformer.layers.16.attention.v_proj",
"vision_tower.transformer.layers.16.feed_forward.down_proj",
"vision_tower.transformer.layers.16.feed_forward.gate_proj",
"vision_tower.transformer.layers.16.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.16.feed_forward.up_proj",
"vision_tower.transformer.layers.17.attention.k_proj",
"vision_tower.transformer.layers.17.attention.o_proj",
"vision_tower.transformer.layers.17.attention.q_proj",
"vision_tower.transformer.layers.17.attention.qkv_proj",
"vision_tower.transformer.layers.17.attention.v_proj",
"vision_tower.transformer.layers.17.feed_forward.down_proj",
"vision_tower.transformer.layers.17.feed_forward.gate_proj",
"vision_tower.transformer.layers.17.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.17.feed_forward.up_proj",
"vision_tower.transformer.layers.18.attention.k_proj",
"vision_tower.transformer.layers.18.attention.o_proj",
"vision_tower.transformer.layers.18.attention.q_proj",
"vision_tower.transformer.layers.18.attention.qkv_proj",
"vision_tower.transformer.layers.18.attention.v_proj",
"vision_tower.transformer.layers.18.feed_forward.down_proj",
"vision_tower.transformer.layers.18.feed_forward.gate_proj",
"vision_tower.transformer.layers.18.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.18.feed_forward.up_proj",
"vision_tower.transformer.layers.19.attention.k_proj",
"vision_tower.transformer.layers.19.attention.o_proj",
"vision_tower.transformer.layers.19.attention.q_proj",
"vision_tower.transformer.layers.19.attention.qkv_proj",
"vision_tower.transformer.layers.19.attention.v_proj",
"vision_tower.transformer.layers.19.feed_forward.down_proj",
"vision_tower.transformer.layers.19.feed_forward.gate_proj",
"vision_tower.transformer.layers.19.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.19.feed_forward.up_proj",
"vision_tower.transformer.layers.2.attention.k_proj",
"vision_tower.transformer.layers.2.attention.o_proj",
"vision_tower.transformer.layers.2.attention.q_proj",
"vision_tower.transformer.layers.2.attention.qkv_proj",
"vision_tower.transformer.layers.2.attention.v_proj",
"vision_tower.transformer.layers.2.feed_forward.down_proj",
"vision_tower.transformer.layers.2.feed_forward.gate_proj",
"vision_tower.transformer.layers.2.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.2.feed_forward.up_proj",
"vision_tower.transformer.layers.20.attention.k_proj",
"vision_tower.transformer.layers.20.attention.o_proj",
"vision_tower.transformer.layers.20.attention.q_proj",
"vision_tower.transformer.layers.20.attention.qkv_proj",
"vision_tower.transformer.layers.20.attention.v_proj",
"vision_tower.transformer.layers.20.feed_forward.down_proj",
"vision_tower.transformer.layers.20.feed_forward.gate_proj",
"vision_tower.transformer.layers.20.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.20.feed_forward.up_proj",
"vision_tower.transformer.layers.21.attention.k_proj",
"vision_tower.transformer.layers.21.attention.o_proj",
"vision_tower.transformer.layers.21.attention.q_proj",
"vision_tower.transformer.layers.21.attention.qkv_proj",
"vision_tower.transformer.layers.21.attention.v_proj",
"vision_tower.transformer.layers.21.feed_forward.down_proj",
"vision_tower.transformer.layers.21.feed_forward.gate_proj",
"vision_tower.transformer.layers.21.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.21.feed_forward.up_proj",
"vision_tower.transformer.layers.22.attention.k_proj",
"vision_tower.transformer.layers.22.attention.o_proj",
"vision_tower.transformer.layers.22.attention.q_proj",
"vision_tower.transformer.layers.22.attention.qkv_proj",
"vision_tower.transformer.layers.22.attention.v_proj",
"vision_tower.transformer.layers.22.feed_forward.down_proj",
"vision_tower.transformer.layers.22.feed_forward.gate_proj",
"vision_tower.transformer.layers.22.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.22.feed_forward.up_proj",
"vision_tower.transformer.layers.23.attention.k_proj",
"vision_tower.transformer.layers.23.attention.o_proj",
"vision_tower.transformer.layers.23.attention.q_proj",
"vision_tower.transformer.layers.23.attention.qkv_proj",
"vision_tower.transformer.layers.23.attention.v_proj",
"vision_tower.transformer.layers.23.feed_forward.down_proj",
"vision_tower.transformer.layers.23.feed_forward.gate_proj",
"vision_tower.transformer.layers.23.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.23.feed_forward.up_proj",
"vision_tower.transformer.layers.24.attention.k_proj",
"vision_tower.transformer.layers.24.attention.o_proj",
"vision_tower.transformer.layers.24.attention.q_proj",
"vision_tower.transformer.layers.24.attention.qkv_proj",
"vision_tower.transformer.layers.24.attention.v_proj",
"vision_tower.transformer.layers.24.feed_forward.down_proj",
"vision_tower.transformer.layers.24.feed_forward.gate_proj",
"vision_tower.transformer.layers.24.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.24.feed_forward.up_proj",
"vision_tower.transformer.layers.25.attention.k_proj",
"vision_tower.transformer.layers.25.attention.o_proj",
"vision_tower.transformer.layers.25.attention.q_proj",
"vision_tower.transformer.layers.25.attention.qkv_proj",
"vision_tower.transformer.layers.25.attention.v_proj",
"vision_tower.transformer.layers.25.feed_forward.down_proj",
"vision_tower.transformer.layers.25.feed_forward.gate_proj",
"vision_tower.transformer.layers.25.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.25.feed_forward.up_proj",
"vision_tower.transformer.layers.26.attention.k_proj",
"vision_tower.transformer.layers.26.attention.o_proj",
"vision_tower.transformer.layers.26.attention.q_proj",
"vision_tower.transformer.layers.26.attention.qkv_proj",
"vision_tower.transformer.layers.26.attention.v_proj",
"vision_tower.transformer.layers.26.feed_forward.down_proj",
"vision_tower.transformer.layers.26.feed_forward.gate_proj",
"vision_tower.transformer.layers.26.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.26.feed_forward.up_proj",
"vision_tower.transformer.layers.27.attention.k_proj",
"vision_tower.transformer.layers.27.attention.o_proj",
"vision_tower.transformer.layers.27.attention.q_proj",
"vision_tower.transformer.layers.27.attention.qkv_proj",
"vision_tower.transformer.layers.27.attention.v_proj",
"vision_tower.transformer.layers.27.feed_forward.down_proj",
"vision_tower.transformer.layers.27.feed_forward.gate_proj",
"vision_tower.transformer.layers.27.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.27.feed_forward.up_proj",
"vision_tower.transformer.layers.28.attention.k_proj",
"vision_tower.transformer.layers.28.attention.o_proj",
"vision_tower.transformer.layers.28.attention.q_proj",
"vision_tower.transformer.layers.28.attention.qkv_proj",
"vision_tower.transformer.layers.28.attention.v_proj",
"vision_tower.transformer.layers.28.feed_forward.down_proj",
"vision_tower.transformer.layers.28.feed_forward.gate_proj",
"vision_tower.transformer.layers.28.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.28.feed_forward.up_proj",
"vision_tower.transformer.layers.29.attention.k_proj",
"vision_tower.transformer.layers.29.attention.o_proj",
"vision_tower.transformer.layers.29.attention.q_proj",
"vision_tower.transformer.layers.29.attention.qkv_proj",
"vision_tower.transformer.layers.29.attention.v_proj",
"vision_tower.transformer.layers.29.feed_forward.down_proj",
"vision_tower.transformer.layers.29.feed_forward.gate_proj",
"vision_tower.transformer.layers.29.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.29.feed_forward.up_proj",
"vision_tower.transformer.layers.3.attention.k_proj",
"vision_tower.transformer.layers.3.attention.o_proj",
"vision_tower.transformer.layers.3.attention.q_proj",
"vision_tower.transformer.layers.3.attention.qkv_proj",
"vision_tower.transformer.layers.3.attention.v_proj",
"vision_tower.transformer.layers.3.feed_forward.down_proj",
"vision_tower.transformer.layers.3.feed_forward.gate_proj",
"vision_tower.transformer.layers.3.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.3.feed_forward.up_proj",
"vision_tower.transformer.layers.30.attention.k_proj",
"vision_tower.transformer.layers.30.attention.o_proj",
"vision_tower.transformer.layers.30.attention.q_proj",
"vision_tower.transformer.layers.30.attention.qkv_proj",
"vision_tower.transformer.layers.30.attention.v_proj",
"vision_tower.transformer.layers.30.feed_forward.down_proj",
"vision_tower.transformer.layers.30.feed_forward.gate_proj",
"vision_tower.transformer.layers.30.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.30.feed_forward.up_proj",
"vision_tower.transformer.layers.31.attention.k_proj",
"vision_tower.transformer.layers.31.attention.o_proj",
"vision_tower.transformer.layers.31.attention.q_proj",
"vision_tower.transformer.layers.31.attention.qkv_proj",
"vision_tower.transformer.layers.31.attention.v_proj",
"vision_tower.transformer.layers.31.feed_forward.down_proj",
"vision_tower.transformer.layers.31.feed_forward.gate_proj",
"vision_tower.transformer.layers.31.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.31.feed_forward.up_proj",
"vision_tower.transformer.layers.32.attention.k_proj",
"vision_tower.transformer.layers.32.attention.o_proj",
"vision_tower.transformer.layers.32.attention.q_proj",
"vision_tower.transformer.layers.32.attention.qkv_proj",
"vision_tower.transformer.layers.32.attention.v_proj",
"vision_tower.transformer.layers.32.feed_forward.down_proj",
"vision_tower.transformer.layers.32.feed_forward.gate_proj",
"vision_tower.transformer.layers.32.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.32.feed_forward.up_proj",
"vision_tower.transformer.layers.33.attention.k_proj",
"vision_tower.transformer.layers.33.attention.o_proj",
"vision_tower.transformer.layers.33.attention.q_proj",
"vision_tower.transformer.layers.33.attention.qkv_proj",
"vision_tower.transformer.layers.33.attention.v_proj",
"vision_tower.transformer.layers.33.feed_forward.down_proj",
"vision_tower.transformer.layers.33.feed_forward.gate_proj",
"vision_tower.transformer.layers.33.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.33.feed_forward.up_proj",
"vision_tower.transformer.layers.34.attention.k_proj",
"vision_tower.transformer.layers.34.attention.o_proj",
"vision_tower.transformer.layers.34.attention.q_proj",
"vision_tower.transformer.layers.34.attention.qkv_proj",
"vision_tower.transformer.layers.34.attention.v_proj",
"vision_tower.transformer.layers.34.feed_forward.down_proj",
"vision_tower.transformer.layers.34.feed_forward.gate_proj",
"vision_tower.transformer.layers.34.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.34.feed_forward.up_proj",
"vision_tower.transformer.layers.35.attention.k_proj",
"vision_tower.transformer.layers.35.attention.o_proj",
"vision_tower.transformer.layers.35.attention.q_proj",
"vision_tower.transformer.layers.35.attention.qkv_proj",
"vision_tower.transformer.layers.35.attention.v_proj",
"vision_tower.transformer.layers.35.feed_forward.down_proj",
"vision_tower.transformer.layers.35.feed_forward.gate_proj",
"vision_tower.transformer.layers.35.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.35.feed_forward.up_proj",
"vision_tower.transformer.layers.36.attention.k_proj",
"vision_tower.transformer.layers.36.attention.o_proj",
"vision_tower.transformer.layers.36.attention.q_proj",
"vision_tower.transformer.layers.36.attention.qkv_proj",
"vision_tower.transformer.layers.36.attention.v_proj",
"vision_tower.transformer.layers.36.feed_forward.down_proj",
"vision_tower.transformer.layers.36.feed_forward.gate_proj",
"vision_tower.transformer.layers.36.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.36.feed_forward.up_proj",
"vision_tower.transformer.layers.37.attention.k_proj",
"vision_tower.transformer.layers.37.attention.o_proj",
"vision_tower.transformer.layers.37.attention.q_proj",
"vision_tower.transformer.layers.37.attention.qkv_proj",
"vision_tower.transformer.layers.37.attention.v_proj",
"vision_tower.transformer.layers.37.feed_forward.down_proj",
"vision_tower.transformer.layers.37.feed_forward.gate_proj",
"vision_tower.transformer.layers.37.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.37.feed_forward.up_proj",
"vision_tower.transformer.layers.38.attention.k_proj",
"vision_tower.transformer.layers.38.attention.o_proj",
"vision_tower.transformer.layers.38.attention.q_proj",
"vision_tower.transformer.layers.38.attention.qkv_proj",
"vision_tower.transformer.layers.38.attention.v_proj",
"vision_tower.transformer.layers.38.feed_forward.down_proj",
"vision_tower.transformer.layers.38.feed_forward.gate_proj",
"vision_tower.transformer.layers.38.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.38.feed_forward.up_proj",
"vision_tower.transformer.layers.39.attention.k_proj",
"vision_tower.transformer.layers.39.attention.o_proj",
"vision_tower.transformer.layers.39.attention.q_proj",
"vision_tower.transformer.layers.39.attention.qkv_proj",
"vision_tower.transformer.layers.39.attention.v_proj",
"vision_tower.transformer.layers.39.feed_forward.down_proj",
"vision_tower.transformer.layers.39.feed_forward.gate_proj",
"vision_tower.transformer.layers.39.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.39.feed_forward.up_proj",
"vision_tower.transformer.layers.4.attention.k_proj",
"vision_tower.transformer.layers.4.attention.o_proj",
"vision_tower.transformer.layers.4.attention.q_proj",
"vision_tower.transformer.layers.4.attention.qkv_proj",
"vision_tower.transformer.layers.4.attention.v_proj",
"vision_tower.transformer.layers.4.feed_forward.down_proj",
"vision_tower.transformer.layers.4.feed_forward.gate_proj",
"vision_tower.transformer.layers.4.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.4.feed_forward.up_proj",
"vision_tower.transformer.layers.40.attention.k_proj",
"vision_tower.transformer.layers.40.attention.o_proj",
"vision_tower.transformer.layers.40.attention.q_proj",
"vision_tower.transformer.layers.40.attention.qkv_proj",
"vision_tower.transformer.layers.40.attention.v_proj",
"vision_tower.transformer.layers.40.feed_forward.down_proj",
"vision_tower.transformer.layers.40.feed_forward.gate_proj",
"vision_tower.transformer.layers.40.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.40.feed_forward.up_proj",
"vision_tower.transformer.layers.41.attention.k_proj",
"vision_tower.transformer.layers.41.attention.o_proj",
"vision_tower.transformer.layers.41.attention.q_proj",
"vision_tower.transformer.layers.41.attention.qkv_proj",
"vision_tower.transformer.layers.41.attention.v_proj",
"vision_tower.transformer.layers.41.feed_forward.down_proj",
"vision_tower.transformer.layers.41.feed_forward.gate_proj",
"vision_tower.transformer.layers.41.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.41.feed_forward.up_proj",
"vision_tower.transformer.layers.42.attention.k_proj",
"vision_tower.transformer.layers.42.attention.o_proj",
"vision_tower.transformer.layers.42.attention.q_proj",
"vision_tower.transformer.layers.42.attention.qkv_proj",
"vision_tower.transformer.layers.42.attention.v_proj",
"vision_tower.transformer.layers.42.feed_forward.down_proj",
"vision_tower.transformer.layers.42.feed_forward.gate_proj",
"vision_tower.transformer.layers.42.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.42.feed_forward.up_proj",
"vision_tower.transformer.layers.43.attention.k_proj",
"vision_tower.transformer.layers.43.attention.o_proj",
"vision_tower.transformer.layers.43.attention.q_proj",
"vision_tower.transformer.layers.43.attention.qkv_proj",
"vision_tower.transformer.layers.43.attention.v_proj",
"vision_tower.transformer.layers.43.feed_forward.down_proj",
"vision_tower.transformer.layers.43.feed_forward.gate_proj",
"vision_tower.transformer.layers.43.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.43.feed_forward.up_proj",
"vision_tower.transformer.layers.44.attention.k_proj",
"vision_tower.transformer.layers.44.attention.o_proj",
"vision_tower.transformer.layers.44.attention.q_proj",
"vision_tower.transformer.layers.44.attention.qkv_proj",
"vision_tower.transformer.layers.44.attention.v_proj",
"vision_tower.transformer.layers.44.feed_forward.down_proj",
"vision_tower.transformer.layers.44.feed_forward.gate_proj",
"vision_tower.transformer.layers.44.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.44.feed_forward.up_proj",
"vision_tower.transformer.layers.45.attention.k_proj",
"vision_tower.transformer.layers.45.attention.o_proj",
"vision_tower.transformer.layers.45.attention.q_proj",
"vision_tower.transformer.layers.45.attention.qkv_proj",
"vision_tower.transformer.layers.45.attention.v_proj",
"vision_tower.transformer.layers.45.feed_forward.down_proj",
"vision_tower.transformer.layers.45.feed_forward.gate_proj",
"vision_tower.transformer.layers.45.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.45.feed_forward.up_proj",
"vision_tower.transformer.layers.46.attention.k_proj",
"vision_tower.transformer.layers.46.attention.o_proj",
"vision_tower.transformer.layers.46.attention.q_proj",
"vision_tower.transformer.layers.46.attention.qkv_proj",
"vision_tower.transformer.layers.46.attention.v_proj",
"vision_tower.transformer.layers.46.feed_forward.down_proj",
"vision_tower.transformer.layers.46.feed_forward.gate_proj",
"vision_tower.transformer.layers.46.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.46.feed_forward.up_proj",
"vision_tower.transformer.layers.47.attention.k_proj",
"vision_tower.transformer.layers.47.attention.o_proj",
"vision_tower.transformer.layers.47.attention.q_proj",
"vision_tower.transformer.layers.47.attention.qkv_proj",
"vision_tower.transformer.layers.47.attention.v_proj",
"vision_tower.transformer.layers.47.feed_forward.down_proj",
"vision_tower.transformer.layers.47.feed_forward.gate_proj",
"vision_tower.transformer.layers.47.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.47.feed_forward.up_proj",
"vision_tower.transformer.layers.5.attention.k_proj",
"vision_tower.transformer.layers.5.attention.o_proj",
"vision_tower.transformer.layers.5.attention.q_proj",
"vision_tower.transformer.layers.5.attention.qkv_proj",
"vision_tower.transformer.layers.5.attention.v_proj",
"vision_tower.transformer.layers.5.feed_forward.down_proj",
"vision_tower.transformer.layers.5.feed_forward.gate_proj",
"vision_tower.transformer.layers.5.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.5.feed_forward.up_proj",
"vision_tower.transformer.layers.6.attention.k_proj",
"vision_tower.transformer.layers.6.attention.o_proj",
"vision_tower.transformer.layers.6.attention.q_proj",
"vision_tower.transformer.layers.6.attention.qkv_proj",
"vision_tower.transformer.layers.6.attention.v_proj",
"vision_tower.transformer.layers.6.feed_forward.down_proj",
"vision_tower.transformer.layers.6.feed_forward.gate_proj",
"vision_tower.transformer.layers.6.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.6.feed_forward.up_proj",
"vision_tower.transformer.layers.7.attention.k_proj",
"vision_tower.transformer.layers.7.attention.o_proj",
"vision_tower.transformer.layers.7.attention.q_proj",
"vision_tower.transformer.layers.7.attention.qkv_proj",
"vision_tower.transformer.layers.7.attention.v_proj",
"vision_tower.transformer.layers.7.feed_forward.down_proj",
"vision_tower.transformer.layers.7.feed_forward.gate_proj",
"vision_tower.transformer.layers.7.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.7.feed_forward.up_proj",
"vision_tower.transformer.layers.8.attention.k_proj",
"vision_tower.transformer.layers.8.attention.o_proj",
"vision_tower.transformer.layers.8.attention.q_proj",
"vision_tower.transformer.layers.8.attention.qkv_proj",
"vision_tower.transformer.layers.8.attention.v_proj",
"vision_tower.transformer.layers.8.feed_forward.down_proj",
"vision_tower.transformer.layers.8.feed_forward.gate_proj",
"vision_tower.transformer.layers.8.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.8.feed_forward.up_proj",
"vision_tower.transformer.layers.9.attention.k_proj",
"vision_tower.transformer.layers.9.attention.o_proj",
"vision_tower.transformer.layers.9.attention.q_proj",
"vision_tower.transformer.layers.9.attention.qkv_proj",
"vision_tower.transformer.layers.9.attention.v_proj",
"vision_tower.transformer.layers.9.feed_forward.down_proj",
"vision_tower.transformer.layers.9.feed_forward.gate_proj",
"vision_tower.transformer.layers.9.feed_forward.gate_up_proj",
"vision_tower.transformer.layers.9.feed_forward.up_proj"
],
"quantization_status": "compressed"
},
"spatial_merge_size": 2,
"text_config": {
"attention_dropout": 0.0,
"bos_token_id": 1,
"eos_token_id": 2,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 12288,
"initializer_range": 0.02,
"intermediate_size": 28672,
"max_position_embeddings": 262144,
"model_type": "ministral3",
"num_attention_heads": 96,
"num_hidden_layers": 88,
"num_key_value_heads": 8,
"pad_token_id": 11,
"rms_norm_eps": 1e-05,
"rope_parameters": {
"beta_fast": 4.0,
"beta_slow": 1.0,
"factor": 64.0,
"llama_4_scaling_beta": 0,
"mscale": 1.0,
"mscale_all_dim": 0.0,
"original_max_position_embeddings": 4096,
"rope_theta": 1000000.0,
"rope_type": "yarn",
"type": "yarn"
},
"sliding_window": null,
"tie_word_embeddings": false,
"use_cache": true,
"vocab_size": 131072
},
"tie_word_embeddings": false,
"transformers_version": "5.6.0.dev0",
"vision_config": {
"attention_dropout": 0.0,
"head_dim": 104,
"hidden_act": "silu",
"hidden_size": 1664,
"image_size": 1540,
"initializer_range": 0.02,
"intermediate_size": 8192,
"model_type": "pixtral",
"num_attention_heads": 16,
"num_channels": 3,
"num_hidden_layers": 48,
"patch_size": 14,
"rope_parameters": {
"rope_theta": 10000.0,
"rope_type": "default"
}
},
"vision_feature_layer": -1
}