| { |
| "architectures": [ |
| "Mistral3ForConditionalGeneration" |
| ], |
| "dtype": "bfloat16", |
| "image_token_index": 10, |
| "model_type": "mistral3", |
| "multimodal_projector_bias": false, |
| "projector_hidden_act": "gelu", |
| "quantization_config": { |
| "quant_method": "compressed-tensors", |
| "format": "mixed-precision", |
| "config_groups": { |
| "group_0": { |
| "format": "float-quantized", |
| "weights": { |
| "num_bits": 8, |
| "type": "float", |
| "strategy": "tensor", |
| "symmetric": true, |
| "dynamic": false, |
| "observer": "memoryless_minmax" |
| }, |
| "input_activations": { |
| "num_bits": 8, |
| "type": "float", |
| "strategy": "tensor", |
| "symmetric": true, |
| "dynamic": false, |
| "observer": "memoryless_minmax" |
| }, |
| "targets": [ |
| "re:^language_model[.]model[.]layers[.]0[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]0[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]0[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]0[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]0[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]1[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]1[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]1[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]1[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]10[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]10[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]10[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]10[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]2[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]2[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]2[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]2[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]2[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]23[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]23[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]23[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]23[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]26[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]26[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]26[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]26[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]28[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]28[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]28[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]28[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]29[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]29[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]29[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]29[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]3[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]3[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]3[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]3[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]30[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]30[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]30[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]30[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]30[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]31[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]31[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]31[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]31[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]32[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]32[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]32[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]32[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]33[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]33[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]33[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]33[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]33[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]34[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]34[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]34[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]34[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]34[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]35[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]35[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]35[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]35[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]36[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]36[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]36[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]36[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]37[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]37[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]37[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]37[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]37[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]38[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]38[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]38[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]38[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]39[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]39[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]39[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]39[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]4[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]4[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]4[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]4[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]40[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]40[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]42[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]5[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]5[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]5[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]5[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]5[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]6[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]6[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]6[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]6[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]76[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]76[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]76[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]76[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]77[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]77[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]77[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]77[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]78[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]78[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]78[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]78[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]79[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]79[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]79[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]79[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]8[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]8[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]8[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]8[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]80[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]80[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]80[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]80[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]81[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]81[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]81[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]81[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]82[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]82[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]82[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]82[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]83[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]83[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]83[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]83[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]84[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]84[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]84[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]84[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]85[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]85[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]85[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]85[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]86[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]9[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]9[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]9[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]9[.]self_attn[.]v_proj$" |
| ] |
| }, |
| "group_1": { |
| "format": "nvfp4-pack-quantized", |
| "weights": { |
| "num_bits": 4, |
| "type": "float", |
| "strategy": "tensor_group", |
| "group_size": 16, |
| "symmetric": true, |
| "dynamic": false, |
| "scale_dtype": "torch.float8_e4m3fn", |
| "zp_dtype": "torch.float8_e4m3fn", |
| "observer": "memoryless_minmax" |
| }, |
| "input_activations": { |
| "num_bits": 4, |
| "type": "float", |
| "strategy": "tensor_group", |
| "group_size": 16, |
| "symmetric": true, |
| "dynamic": "local", |
| "observer": "static_minmax", |
| "scale_dtype": "torch.float8_e4m3fn", |
| "zp_dtype": "torch.float8_e4m3fn" |
| }, |
| "targets": [ |
| "re:^language_model[.]model[.]layers[.]0[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]0[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]0[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]0[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]1[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]1[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]1[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]1[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]1[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]10[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]10[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]10[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]10[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]10[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]11[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]11[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]11[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]11[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]11[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]11[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]11[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]11[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]11[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]12[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]12[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]12[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]12[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]12[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]12[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]12[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]12[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]12[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]13[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]13[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]13[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]13[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]13[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]13[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]13[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]13[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]13[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]14[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]14[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]14[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]14[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]14[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]14[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]14[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]14[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]14[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]15[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]15[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]15[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]15[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]15[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]15[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]15[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]15[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]15[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]16[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]16[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]16[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]16[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]16[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]16[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]16[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]16[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]16[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]17[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]17[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]17[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]17[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]17[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]17[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]17[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]17[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]17[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]18[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]18[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]18[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]18[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]18[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]18[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]18[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]18[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]18[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]19[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]19[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]19[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]19[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]19[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]19[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]19[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]19[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]19[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]2[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]2[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]2[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]2[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]20[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]20[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]20[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]20[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]20[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]20[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]20[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]20[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]20[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]21[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]21[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]21[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]21[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]21[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]21[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]21[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]21[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]21[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]22[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]22[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]22[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]22[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]22[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]22[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]22[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]22[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]22[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]23[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]23[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]23[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]23[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]23[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]24[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]24[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]24[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]24[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]24[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]24[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]24[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]24[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]24[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]25[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]25[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]25[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]25[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]25[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]25[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]25[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]25[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]25[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]26[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]26[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]26[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]26[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]26[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]27[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]27[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]27[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]27[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]27[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]27[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]27[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]27[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]27[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]28[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]28[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]28[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]28[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]28[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]29[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]29[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]29[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]29[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]29[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]3[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]3[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]3[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]3[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]3[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]30[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]30[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]30[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]30[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]31[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]31[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]31[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]31[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]31[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]32[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]32[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]32[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]32[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]32[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]33[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]33[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]33[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]33[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]34[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]34[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]34[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]34[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]35[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]35[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]35[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]35[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]35[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]36[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]36[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]36[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]36[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]36[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]37[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]37[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]37[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]37[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]38[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]38[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]38[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]38[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]38[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]39[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]39[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]39[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]39[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]39[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]4[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]4[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]4[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]4[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]4[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]40[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]40[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]40[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]40[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]40[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]40[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]40[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]41[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]41[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]41[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]41[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]41[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]41[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]41[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]41[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]41[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]42[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]42[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]42[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]42[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]42[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]42[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]42[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]42[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]43[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]43[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]43[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]43[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]43[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]43[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]43[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]43[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]43[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]44[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]44[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]44[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]44[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]44[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]44[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]44[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]44[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]44[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]45[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]45[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]45[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]45[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]45[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]45[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]45[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]45[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]45[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]46[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]46[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]46[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]46[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]46[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]46[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]46[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]46[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]46[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]47[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]47[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]47[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]47[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]47[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]47[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]47[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]47[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]47[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]48[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]48[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]48[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]48[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]48[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]48[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]48[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]48[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]48[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]49[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]49[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]49[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]49[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]49[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]49[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]49[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]49[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]49[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]5[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]5[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]5[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]5[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]50[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]50[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]50[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]50[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]50[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]50[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]50[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]50[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]50[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]51[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]51[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]51[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]51[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]51[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]51[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]51[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]51[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]51[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]52[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]52[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]52[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]52[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]52[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]52[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]52[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]52[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]52[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]53[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]53[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]53[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]53[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]53[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]53[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]53[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]53[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]53[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]54[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]54[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]54[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]54[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]54[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]54[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]54[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]54[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]54[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]55[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]55[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]55[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]55[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]55[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]55[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]55[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]55[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]55[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]56[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]56[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]56[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]56[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]56[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]56[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]56[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]56[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]56[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]57[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]57[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]57[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]57[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]57[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]57[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]57[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]57[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]57[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]58[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]58[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]58[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]58[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]58[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]58[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]58[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]58[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]58[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]59[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]59[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]59[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]59[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]59[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]59[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]59[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]59[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]59[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]6[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]6[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]6[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]6[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]6[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]60[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]60[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]60[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]60[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]60[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]60[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]60[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]60[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]60[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]61[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]61[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]61[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]61[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]61[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]61[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]61[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]61[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]61[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]62[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]62[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]62[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]62[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]62[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]62[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]62[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]62[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]62[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]63[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]63[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]63[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]63[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]63[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]63[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]63[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]63[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]63[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]64[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]64[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]64[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]64[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]64[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]64[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]64[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]64[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]64[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]65[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]65[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]65[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]65[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]65[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]65[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]65[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]65[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]65[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]66[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]66[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]66[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]66[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]66[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]66[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]66[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]66[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]66[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]67[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]67[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]67[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]67[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]67[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]67[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]67[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]67[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]67[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]68[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]68[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]68[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]68[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]68[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]68[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]68[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]68[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]68[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]69[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]69[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]69[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]69[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]69[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]69[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]69[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]69[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]69[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]7[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]7[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]7[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]7[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]7[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]7[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]7[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]7[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]7[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]70[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]70[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]70[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]70[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]70[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]70[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]70[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]70[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]70[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]71[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]71[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]71[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]71[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]71[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]71[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]71[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]71[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]71[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]72[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]72[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]72[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]72[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]72[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]72[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]72[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]72[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]72[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]73[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]73[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]73[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]73[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]73[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]73[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]73[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]73[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]73[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]74[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]74[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]74[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]74[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]74[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]74[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]74[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]74[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]74[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]75[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]75[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]75[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]75[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]75[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]75[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]75[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]75[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]75[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]76[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]76[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]76[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]76[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]76[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]77[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]77[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]77[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]77[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]77[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]78[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]78[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]78[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]78[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]78[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]79[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]79[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]79[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]79[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]79[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]8[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]8[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]8[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]8[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]8[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]80[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]80[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]80[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]80[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]80[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]81[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]81[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]81[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]81[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]81[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]82[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]82[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]82[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]82[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]82[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]83[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]83[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]83[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]83[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]83[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]84[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]84[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]84[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]84[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]84[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]85[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]85[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]85[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]85[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]85[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]86[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]86[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]86[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]86[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]86[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]86[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]86[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]86[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]87[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]87[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]87[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]87[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]87[.]self_attn[.]k_proj$", |
| "re:^language_model[.]model[.]layers[.]87[.]self_attn[.]o_proj$", |
| "re:^language_model[.]model[.]layers[.]87[.]self_attn[.]q_proj$", |
| "re:^language_model[.]model[.]layers[.]87[.]self_attn[.]qkv_proj$", |
| "re:^language_model[.]model[.]layers[.]87[.]self_attn[.]v_proj$", |
| "re:^language_model[.]model[.]layers[.]9[.]mlp[.]down_proj$", |
| "re:^language_model[.]model[.]layers[.]9[.]mlp[.]gate_proj$", |
| "re:^language_model[.]model[.]layers[.]9[.]mlp[.]gate_up_proj$", |
| "re:^language_model[.]model[.]layers[.]9[.]mlp[.]up_proj$", |
| "re:^language_model[.]model[.]layers[.]9[.]self_attn[.]o_proj$" |
| ] |
| } |
| }, |
| "ignore": [ |
| "language_model.lm_head", |
| "language_model.model.embed_tokens", |
| "multi_modal_projector.linear_1", |
| "multi_modal_projector.linear_2", |
| "multi_modal_projector.patch_merger.merging_layer", |
| "vision_tower.transformer.layers.0.attention.k_proj", |
| "vision_tower.transformer.layers.0.attention.o_proj", |
| "vision_tower.transformer.layers.0.attention.q_proj", |
| "vision_tower.transformer.layers.0.attention.qkv_proj", |
| "vision_tower.transformer.layers.0.attention.v_proj", |
| "vision_tower.transformer.layers.0.feed_forward.down_proj", |
| "vision_tower.transformer.layers.0.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.0.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.0.feed_forward.up_proj", |
| "vision_tower.transformer.layers.1.attention.k_proj", |
| "vision_tower.transformer.layers.1.attention.o_proj", |
| "vision_tower.transformer.layers.1.attention.q_proj", |
| "vision_tower.transformer.layers.1.attention.qkv_proj", |
| "vision_tower.transformer.layers.1.attention.v_proj", |
| "vision_tower.transformer.layers.1.feed_forward.down_proj", |
| "vision_tower.transformer.layers.1.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.1.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.1.feed_forward.up_proj", |
| "vision_tower.transformer.layers.10.attention.k_proj", |
| "vision_tower.transformer.layers.10.attention.o_proj", |
| "vision_tower.transformer.layers.10.attention.q_proj", |
| "vision_tower.transformer.layers.10.attention.qkv_proj", |
| "vision_tower.transformer.layers.10.attention.v_proj", |
| "vision_tower.transformer.layers.10.feed_forward.down_proj", |
| "vision_tower.transformer.layers.10.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.10.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.10.feed_forward.up_proj", |
| "vision_tower.transformer.layers.11.attention.k_proj", |
| "vision_tower.transformer.layers.11.attention.o_proj", |
| "vision_tower.transformer.layers.11.attention.q_proj", |
| "vision_tower.transformer.layers.11.attention.qkv_proj", |
| "vision_tower.transformer.layers.11.attention.v_proj", |
| "vision_tower.transformer.layers.11.feed_forward.down_proj", |
| "vision_tower.transformer.layers.11.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.11.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.11.feed_forward.up_proj", |
| "vision_tower.transformer.layers.12.attention.k_proj", |
| "vision_tower.transformer.layers.12.attention.o_proj", |
| "vision_tower.transformer.layers.12.attention.q_proj", |
| "vision_tower.transformer.layers.12.attention.qkv_proj", |
| "vision_tower.transformer.layers.12.attention.v_proj", |
| "vision_tower.transformer.layers.12.feed_forward.down_proj", |
| "vision_tower.transformer.layers.12.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.12.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.12.feed_forward.up_proj", |
| "vision_tower.transformer.layers.13.attention.k_proj", |
| "vision_tower.transformer.layers.13.attention.o_proj", |
| "vision_tower.transformer.layers.13.attention.q_proj", |
| "vision_tower.transformer.layers.13.attention.qkv_proj", |
| "vision_tower.transformer.layers.13.attention.v_proj", |
| "vision_tower.transformer.layers.13.feed_forward.down_proj", |
| "vision_tower.transformer.layers.13.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.13.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.13.feed_forward.up_proj", |
| "vision_tower.transformer.layers.14.attention.k_proj", |
| "vision_tower.transformer.layers.14.attention.o_proj", |
| "vision_tower.transformer.layers.14.attention.q_proj", |
| "vision_tower.transformer.layers.14.attention.qkv_proj", |
| "vision_tower.transformer.layers.14.attention.v_proj", |
| "vision_tower.transformer.layers.14.feed_forward.down_proj", |
| "vision_tower.transformer.layers.14.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.14.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.14.feed_forward.up_proj", |
| "vision_tower.transformer.layers.15.attention.k_proj", |
| "vision_tower.transformer.layers.15.attention.o_proj", |
| "vision_tower.transformer.layers.15.attention.q_proj", |
| "vision_tower.transformer.layers.15.attention.qkv_proj", |
| "vision_tower.transformer.layers.15.attention.v_proj", |
| "vision_tower.transformer.layers.15.feed_forward.down_proj", |
| "vision_tower.transformer.layers.15.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.15.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.15.feed_forward.up_proj", |
| "vision_tower.transformer.layers.16.attention.k_proj", |
| "vision_tower.transformer.layers.16.attention.o_proj", |
| "vision_tower.transformer.layers.16.attention.q_proj", |
| "vision_tower.transformer.layers.16.attention.qkv_proj", |
| "vision_tower.transformer.layers.16.attention.v_proj", |
| "vision_tower.transformer.layers.16.feed_forward.down_proj", |
| "vision_tower.transformer.layers.16.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.16.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.16.feed_forward.up_proj", |
| "vision_tower.transformer.layers.17.attention.k_proj", |
| "vision_tower.transformer.layers.17.attention.o_proj", |
| "vision_tower.transformer.layers.17.attention.q_proj", |
| "vision_tower.transformer.layers.17.attention.qkv_proj", |
| "vision_tower.transformer.layers.17.attention.v_proj", |
| "vision_tower.transformer.layers.17.feed_forward.down_proj", |
| "vision_tower.transformer.layers.17.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.17.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.17.feed_forward.up_proj", |
| "vision_tower.transformer.layers.18.attention.k_proj", |
| "vision_tower.transformer.layers.18.attention.o_proj", |
| "vision_tower.transformer.layers.18.attention.q_proj", |
| "vision_tower.transformer.layers.18.attention.qkv_proj", |
| "vision_tower.transformer.layers.18.attention.v_proj", |
| "vision_tower.transformer.layers.18.feed_forward.down_proj", |
| "vision_tower.transformer.layers.18.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.18.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.18.feed_forward.up_proj", |
| "vision_tower.transformer.layers.19.attention.k_proj", |
| "vision_tower.transformer.layers.19.attention.o_proj", |
| "vision_tower.transformer.layers.19.attention.q_proj", |
| "vision_tower.transformer.layers.19.attention.qkv_proj", |
| "vision_tower.transformer.layers.19.attention.v_proj", |
| "vision_tower.transformer.layers.19.feed_forward.down_proj", |
| "vision_tower.transformer.layers.19.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.19.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.19.feed_forward.up_proj", |
| "vision_tower.transformer.layers.2.attention.k_proj", |
| "vision_tower.transformer.layers.2.attention.o_proj", |
| "vision_tower.transformer.layers.2.attention.q_proj", |
| "vision_tower.transformer.layers.2.attention.qkv_proj", |
| "vision_tower.transformer.layers.2.attention.v_proj", |
| "vision_tower.transformer.layers.2.feed_forward.down_proj", |
| "vision_tower.transformer.layers.2.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.2.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.2.feed_forward.up_proj", |
| "vision_tower.transformer.layers.20.attention.k_proj", |
| "vision_tower.transformer.layers.20.attention.o_proj", |
| "vision_tower.transformer.layers.20.attention.q_proj", |
| "vision_tower.transformer.layers.20.attention.qkv_proj", |
| "vision_tower.transformer.layers.20.attention.v_proj", |
| "vision_tower.transformer.layers.20.feed_forward.down_proj", |
| "vision_tower.transformer.layers.20.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.20.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.20.feed_forward.up_proj", |
| "vision_tower.transformer.layers.21.attention.k_proj", |
| "vision_tower.transformer.layers.21.attention.o_proj", |
| "vision_tower.transformer.layers.21.attention.q_proj", |
| "vision_tower.transformer.layers.21.attention.qkv_proj", |
| "vision_tower.transformer.layers.21.attention.v_proj", |
| "vision_tower.transformer.layers.21.feed_forward.down_proj", |
| "vision_tower.transformer.layers.21.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.21.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.21.feed_forward.up_proj", |
| "vision_tower.transformer.layers.22.attention.k_proj", |
| "vision_tower.transformer.layers.22.attention.o_proj", |
| "vision_tower.transformer.layers.22.attention.q_proj", |
| "vision_tower.transformer.layers.22.attention.qkv_proj", |
| "vision_tower.transformer.layers.22.attention.v_proj", |
| "vision_tower.transformer.layers.22.feed_forward.down_proj", |
| "vision_tower.transformer.layers.22.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.22.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.22.feed_forward.up_proj", |
| "vision_tower.transformer.layers.23.attention.k_proj", |
| "vision_tower.transformer.layers.23.attention.o_proj", |
| "vision_tower.transformer.layers.23.attention.q_proj", |
| "vision_tower.transformer.layers.23.attention.qkv_proj", |
| "vision_tower.transformer.layers.23.attention.v_proj", |
| "vision_tower.transformer.layers.23.feed_forward.down_proj", |
| "vision_tower.transformer.layers.23.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.23.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.23.feed_forward.up_proj", |
| "vision_tower.transformer.layers.24.attention.k_proj", |
| "vision_tower.transformer.layers.24.attention.o_proj", |
| "vision_tower.transformer.layers.24.attention.q_proj", |
| "vision_tower.transformer.layers.24.attention.qkv_proj", |
| "vision_tower.transformer.layers.24.attention.v_proj", |
| "vision_tower.transformer.layers.24.feed_forward.down_proj", |
| "vision_tower.transformer.layers.24.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.24.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.24.feed_forward.up_proj", |
| "vision_tower.transformer.layers.25.attention.k_proj", |
| "vision_tower.transformer.layers.25.attention.o_proj", |
| "vision_tower.transformer.layers.25.attention.q_proj", |
| "vision_tower.transformer.layers.25.attention.qkv_proj", |
| "vision_tower.transformer.layers.25.attention.v_proj", |
| "vision_tower.transformer.layers.25.feed_forward.down_proj", |
| "vision_tower.transformer.layers.25.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.25.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.25.feed_forward.up_proj", |
| "vision_tower.transformer.layers.26.attention.k_proj", |
| "vision_tower.transformer.layers.26.attention.o_proj", |
| "vision_tower.transformer.layers.26.attention.q_proj", |
| "vision_tower.transformer.layers.26.attention.qkv_proj", |
| "vision_tower.transformer.layers.26.attention.v_proj", |
| "vision_tower.transformer.layers.26.feed_forward.down_proj", |
| "vision_tower.transformer.layers.26.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.26.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.26.feed_forward.up_proj", |
| "vision_tower.transformer.layers.27.attention.k_proj", |
| "vision_tower.transformer.layers.27.attention.o_proj", |
| "vision_tower.transformer.layers.27.attention.q_proj", |
| "vision_tower.transformer.layers.27.attention.qkv_proj", |
| "vision_tower.transformer.layers.27.attention.v_proj", |
| "vision_tower.transformer.layers.27.feed_forward.down_proj", |
| "vision_tower.transformer.layers.27.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.27.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.27.feed_forward.up_proj", |
| "vision_tower.transformer.layers.28.attention.k_proj", |
| "vision_tower.transformer.layers.28.attention.o_proj", |
| "vision_tower.transformer.layers.28.attention.q_proj", |
| "vision_tower.transformer.layers.28.attention.qkv_proj", |
| "vision_tower.transformer.layers.28.attention.v_proj", |
| "vision_tower.transformer.layers.28.feed_forward.down_proj", |
| "vision_tower.transformer.layers.28.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.28.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.28.feed_forward.up_proj", |
| "vision_tower.transformer.layers.29.attention.k_proj", |
| "vision_tower.transformer.layers.29.attention.o_proj", |
| "vision_tower.transformer.layers.29.attention.q_proj", |
| "vision_tower.transformer.layers.29.attention.qkv_proj", |
| "vision_tower.transformer.layers.29.attention.v_proj", |
| "vision_tower.transformer.layers.29.feed_forward.down_proj", |
| "vision_tower.transformer.layers.29.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.29.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.29.feed_forward.up_proj", |
| "vision_tower.transformer.layers.3.attention.k_proj", |
| "vision_tower.transformer.layers.3.attention.o_proj", |
| "vision_tower.transformer.layers.3.attention.q_proj", |
| "vision_tower.transformer.layers.3.attention.qkv_proj", |
| "vision_tower.transformer.layers.3.attention.v_proj", |
| "vision_tower.transformer.layers.3.feed_forward.down_proj", |
| "vision_tower.transformer.layers.3.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.3.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.3.feed_forward.up_proj", |
| "vision_tower.transformer.layers.30.attention.k_proj", |
| "vision_tower.transformer.layers.30.attention.o_proj", |
| "vision_tower.transformer.layers.30.attention.q_proj", |
| "vision_tower.transformer.layers.30.attention.qkv_proj", |
| "vision_tower.transformer.layers.30.attention.v_proj", |
| "vision_tower.transformer.layers.30.feed_forward.down_proj", |
| "vision_tower.transformer.layers.30.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.30.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.30.feed_forward.up_proj", |
| "vision_tower.transformer.layers.31.attention.k_proj", |
| "vision_tower.transformer.layers.31.attention.o_proj", |
| "vision_tower.transformer.layers.31.attention.q_proj", |
| "vision_tower.transformer.layers.31.attention.qkv_proj", |
| "vision_tower.transformer.layers.31.attention.v_proj", |
| "vision_tower.transformer.layers.31.feed_forward.down_proj", |
| "vision_tower.transformer.layers.31.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.31.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.31.feed_forward.up_proj", |
| "vision_tower.transformer.layers.32.attention.k_proj", |
| "vision_tower.transformer.layers.32.attention.o_proj", |
| "vision_tower.transformer.layers.32.attention.q_proj", |
| "vision_tower.transformer.layers.32.attention.qkv_proj", |
| "vision_tower.transformer.layers.32.attention.v_proj", |
| "vision_tower.transformer.layers.32.feed_forward.down_proj", |
| "vision_tower.transformer.layers.32.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.32.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.32.feed_forward.up_proj", |
| "vision_tower.transformer.layers.33.attention.k_proj", |
| "vision_tower.transformer.layers.33.attention.o_proj", |
| "vision_tower.transformer.layers.33.attention.q_proj", |
| "vision_tower.transformer.layers.33.attention.qkv_proj", |
| "vision_tower.transformer.layers.33.attention.v_proj", |
| "vision_tower.transformer.layers.33.feed_forward.down_proj", |
| "vision_tower.transformer.layers.33.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.33.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.33.feed_forward.up_proj", |
| "vision_tower.transformer.layers.34.attention.k_proj", |
| "vision_tower.transformer.layers.34.attention.o_proj", |
| "vision_tower.transformer.layers.34.attention.q_proj", |
| "vision_tower.transformer.layers.34.attention.qkv_proj", |
| "vision_tower.transformer.layers.34.attention.v_proj", |
| "vision_tower.transformer.layers.34.feed_forward.down_proj", |
| "vision_tower.transformer.layers.34.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.34.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.34.feed_forward.up_proj", |
| "vision_tower.transformer.layers.35.attention.k_proj", |
| "vision_tower.transformer.layers.35.attention.o_proj", |
| "vision_tower.transformer.layers.35.attention.q_proj", |
| "vision_tower.transformer.layers.35.attention.qkv_proj", |
| "vision_tower.transformer.layers.35.attention.v_proj", |
| "vision_tower.transformer.layers.35.feed_forward.down_proj", |
| "vision_tower.transformer.layers.35.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.35.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.35.feed_forward.up_proj", |
| "vision_tower.transformer.layers.36.attention.k_proj", |
| "vision_tower.transformer.layers.36.attention.o_proj", |
| "vision_tower.transformer.layers.36.attention.q_proj", |
| "vision_tower.transformer.layers.36.attention.qkv_proj", |
| "vision_tower.transformer.layers.36.attention.v_proj", |
| "vision_tower.transformer.layers.36.feed_forward.down_proj", |
| "vision_tower.transformer.layers.36.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.36.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.36.feed_forward.up_proj", |
| "vision_tower.transformer.layers.37.attention.k_proj", |
| "vision_tower.transformer.layers.37.attention.o_proj", |
| "vision_tower.transformer.layers.37.attention.q_proj", |
| "vision_tower.transformer.layers.37.attention.qkv_proj", |
| "vision_tower.transformer.layers.37.attention.v_proj", |
| "vision_tower.transformer.layers.37.feed_forward.down_proj", |
| "vision_tower.transformer.layers.37.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.37.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.37.feed_forward.up_proj", |
| "vision_tower.transformer.layers.38.attention.k_proj", |
| "vision_tower.transformer.layers.38.attention.o_proj", |
| "vision_tower.transformer.layers.38.attention.q_proj", |
| "vision_tower.transformer.layers.38.attention.qkv_proj", |
| "vision_tower.transformer.layers.38.attention.v_proj", |
| "vision_tower.transformer.layers.38.feed_forward.down_proj", |
| "vision_tower.transformer.layers.38.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.38.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.38.feed_forward.up_proj", |
| "vision_tower.transformer.layers.39.attention.k_proj", |
| "vision_tower.transformer.layers.39.attention.o_proj", |
| "vision_tower.transformer.layers.39.attention.q_proj", |
| "vision_tower.transformer.layers.39.attention.qkv_proj", |
| "vision_tower.transformer.layers.39.attention.v_proj", |
| "vision_tower.transformer.layers.39.feed_forward.down_proj", |
| "vision_tower.transformer.layers.39.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.39.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.39.feed_forward.up_proj", |
| "vision_tower.transformer.layers.4.attention.k_proj", |
| "vision_tower.transformer.layers.4.attention.o_proj", |
| "vision_tower.transformer.layers.4.attention.q_proj", |
| "vision_tower.transformer.layers.4.attention.qkv_proj", |
| "vision_tower.transformer.layers.4.attention.v_proj", |
| "vision_tower.transformer.layers.4.feed_forward.down_proj", |
| "vision_tower.transformer.layers.4.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.4.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.4.feed_forward.up_proj", |
| "vision_tower.transformer.layers.40.attention.k_proj", |
| "vision_tower.transformer.layers.40.attention.o_proj", |
| "vision_tower.transformer.layers.40.attention.q_proj", |
| "vision_tower.transformer.layers.40.attention.qkv_proj", |
| "vision_tower.transformer.layers.40.attention.v_proj", |
| "vision_tower.transformer.layers.40.feed_forward.down_proj", |
| "vision_tower.transformer.layers.40.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.40.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.40.feed_forward.up_proj", |
| "vision_tower.transformer.layers.41.attention.k_proj", |
| "vision_tower.transformer.layers.41.attention.o_proj", |
| "vision_tower.transformer.layers.41.attention.q_proj", |
| "vision_tower.transformer.layers.41.attention.qkv_proj", |
| "vision_tower.transformer.layers.41.attention.v_proj", |
| "vision_tower.transformer.layers.41.feed_forward.down_proj", |
| "vision_tower.transformer.layers.41.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.41.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.41.feed_forward.up_proj", |
| "vision_tower.transformer.layers.42.attention.k_proj", |
| "vision_tower.transformer.layers.42.attention.o_proj", |
| "vision_tower.transformer.layers.42.attention.q_proj", |
| "vision_tower.transformer.layers.42.attention.qkv_proj", |
| "vision_tower.transformer.layers.42.attention.v_proj", |
| "vision_tower.transformer.layers.42.feed_forward.down_proj", |
| "vision_tower.transformer.layers.42.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.42.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.42.feed_forward.up_proj", |
| "vision_tower.transformer.layers.43.attention.k_proj", |
| "vision_tower.transformer.layers.43.attention.o_proj", |
| "vision_tower.transformer.layers.43.attention.q_proj", |
| "vision_tower.transformer.layers.43.attention.qkv_proj", |
| "vision_tower.transformer.layers.43.attention.v_proj", |
| "vision_tower.transformer.layers.43.feed_forward.down_proj", |
| "vision_tower.transformer.layers.43.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.43.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.43.feed_forward.up_proj", |
| "vision_tower.transformer.layers.44.attention.k_proj", |
| "vision_tower.transformer.layers.44.attention.o_proj", |
| "vision_tower.transformer.layers.44.attention.q_proj", |
| "vision_tower.transformer.layers.44.attention.qkv_proj", |
| "vision_tower.transformer.layers.44.attention.v_proj", |
| "vision_tower.transformer.layers.44.feed_forward.down_proj", |
| "vision_tower.transformer.layers.44.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.44.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.44.feed_forward.up_proj", |
| "vision_tower.transformer.layers.45.attention.k_proj", |
| "vision_tower.transformer.layers.45.attention.o_proj", |
| "vision_tower.transformer.layers.45.attention.q_proj", |
| "vision_tower.transformer.layers.45.attention.qkv_proj", |
| "vision_tower.transformer.layers.45.attention.v_proj", |
| "vision_tower.transformer.layers.45.feed_forward.down_proj", |
| "vision_tower.transformer.layers.45.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.45.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.45.feed_forward.up_proj", |
| "vision_tower.transformer.layers.46.attention.k_proj", |
| "vision_tower.transformer.layers.46.attention.o_proj", |
| "vision_tower.transformer.layers.46.attention.q_proj", |
| "vision_tower.transformer.layers.46.attention.qkv_proj", |
| "vision_tower.transformer.layers.46.attention.v_proj", |
| "vision_tower.transformer.layers.46.feed_forward.down_proj", |
| "vision_tower.transformer.layers.46.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.46.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.46.feed_forward.up_proj", |
| "vision_tower.transformer.layers.47.attention.k_proj", |
| "vision_tower.transformer.layers.47.attention.o_proj", |
| "vision_tower.transformer.layers.47.attention.q_proj", |
| "vision_tower.transformer.layers.47.attention.qkv_proj", |
| "vision_tower.transformer.layers.47.attention.v_proj", |
| "vision_tower.transformer.layers.47.feed_forward.down_proj", |
| "vision_tower.transformer.layers.47.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.47.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.47.feed_forward.up_proj", |
| "vision_tower.transformer.layers.5.attention.k_proj", |
| "vision_tower.transformer.layers.5.attention.o_proj", |
| "vision_tower.transformer.layers.5.attention.q_proj", |
| "vision_tower.transformer.layers.5.attention.qkv_proj", |
| "vision_tower.transformer.layers.5.attention.v_proj", |
| "vision_tower.transformer.layers.5.feed_forward.down_proj", |
| "vision_tower.transformer.layers.5.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.5.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.5.feed_forward.up_proj", |
| "vision_tower.transformer.layers.6.attention.k_proj", |
| "vision_tower.transformer.layers.6.attention.o_proj", |
| "vision_tower.transformer.layers.6.attention.q_proj", |
| "vision_tower.transformer.layers.6.attention.qkv_proj", |
| "vision_tower.transformer.layers.6.attention.v_proj", |
| "vision_tower.transformer.layers.6.feed_forward.down_proj", |
| "vision_tower.transformer.layers.6.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.6.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.6.feed_forward.up_proj", |
| "vision_tower.transformer.layers.7.attention.k_proj", |
| "vision_tower.transformer.layers.7.attention.o_proj", |
| "vision_tower.transformer.layers.7.attention.q_proj", |
| "vision_tower.transformer.layers.7.attention.qkv_proj", |
| "vision_tower.transformer.layers.7.attention.v_proj", |
| "vision_tower.transformer.layers.7.feed_forward.down_proj", |
| "vision_tower.transformer.layers.7.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.7.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.7.feed_forward.up_proj", |
| "vision_tower.transformer.layers.8.attention.k_proj", |
| "vision_tower.transformer.layers.8.attention.o_proj", |
| "vision_tower.transformer.layers.8.attention.q_proj", |
| "vision_tower.transformer.layers.8.attention.qkv_proj", |
| "vision_tower.transformer.layers.8.attention.v_proj", |
| "vision_tower.transformer.layers.8.feed_forward.down_proj", |
| "vision_tower.transformer.layers.8.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.8.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.8.feed_forward.up_proj", |
| "vision_tower.transformer.layers.9.attention.k_proj", |
| "vision_tower.transformer.layers.9.attention.o_proj", |
| "vision_tower.transformer.layers.9.attention.q_proj", |
| "vision_tower.transformer.layers.9.attention.qkv_proj", |
| "vision_tower.transformer.layers.9.attention.v_proj", |
| "vision_tower.transformer.layers.9.feed_forward.down_proj", |
| "vision_tower.transformer.layers.9.feed_forward.gate_proj", |
| "vision_tower.transformer.layers.9.feed_forward.gate_up_proj", |
| "vision_tower.transformer.layers.9.feed_forward.up_proj" |
| ], |
| "quantization_status": "compressed" |
| }, |
| "spatial_merge_size": 2, |
| "text_config": { |
| "attention_dropout": 0.0, |
| "bos_token_id": 1, |
| "eos_token_id": 2, |
| "head_dim": 128, |
| "hidden_act": "silu", |
| "hidden_size": 12288, |
| "initializer_range": 0.02, |
| "intermediate_size": 28672, |
| "max_position_embeddings": 262144, |
| "model_type": "ministral3", |
| "num_attention_heads": 96, |
| "num_hidden_layers": 88, |
| "num_key_value_heads": 8, |
| "pad_token_id": 11, |
| "rms_norm_eps": 1e-05, |
| "rope_parameters": { |
| "beta_fast": 4.0, |
| "beta_slow": 1.0, |
| "factor": 64.0, |
| "llama_4_scaling_beta": 0, |
| "mscale": 1.0, |
| "mscale_all_dim": 0.0, |
| "original_max_position_embeddings": 4096, |
| "rope_theta": 1000000.0, |
| "rope_type": "yarn", |
| "type": "yarn" |
| }, |
| "sliding_window": null, |
| "tie_word_embeddings": false, |
| "use_cache": true, |
| "vocab_size": 131072 |
| }, |
| "tie_word_embeddings": false, |
| "transformers_version": "5.6.0.dev0", |
| "vision_config": { |
| "attention_dropout": 0.0, |
| "head_dim": 104, |
| "hidden_act": "silu", |
| "hidden_size": 1664, |
| "image_size": 1540, |
| "initializer_range": 0.02, |
| "intermediate_size": 8192, |
| "model_type": "pixtral", |
| "num_attention_heads": 16, |
| "num_channels": 3, |
| "num_hidden_layers": 48, |
| "patch_size": 14, |
| "rope_parameters": { |
| "rope_theta": 10000.0, |
| "rope_type": "default" |
| } |
| }, |
| "vision_feature_layer": -1 |
| } |