[ { "key": ".*", "dtype": "ggml_q6_k", "comment": "default use dtype q6_k" }, { "key": ".*mlp\\.*norm*\\.weight", "dtype": "float32", "comment": "gate uses float32" }, { "key": ".*mlp\\.gate\\.weight", "dtype": "float32", "comment": "gate uses float32" }, { "key": ".*mlp\\.(down|gate|up)_proj\\.weight", "dtype": "ggml_q4_k", "comment": "down gate up proj uses q4_k" }, { "key": "model\\.layers\\.*self_attn\\.(kv_a_proj_with_mqa|kv_b_proj|o_proj|q_a_proj|q_b_proj|)\\.weight.*", "dtype": "ggml_q6_k", "comment": " self attn uses q6_k" }, { "key": "model\\.layers\\.([0-9]|1[0-9]|2[0-9]|3[0-9]|4[0-9]|5[0-5])\\.*self_attn\\.(kv_a_proj_with_mqa|kv_b_proj|o_proj|q_a_proj|q_b_proj|)\\.weight.*", "dtype": "ggml_q4_k", "comment": " 0-55 self attn uses q4_k" }, { "key": "model\\.layers\\.([0-9]|1[0-9]|2[0-9]|3[0-9]|4[0-9]|5[0-8])\\..*experts.*(gate|up|down)_proj.*", "dtype": "ggml_q2_k", "comment": "0-58 moe use q2_k" }, { "key": "model\\.layers\\.(59)\\..*experts.*(gate|up|down)_proj.*", "dtype": "ggml_q3_k", "comment": "59 ffn up/gate/down moe use q3_k" }, { "key": "model\\.layers\\.(60)\\..*experts.*(gate|up|down)_proj.*", "dtype": "ggml_q4_k", "comment": "60 ffn up/gate/down moe use q4_k" }, { "key": ".*shared_experts.*(gate|up|down)_proj.*", "dtype": "float16", "comment": "shared experts use float16" } ]