DeepSeek-V3.2-REAP-345B-A37B-fastllm-Q2_K / fastllm_dtype_config.json
lovedheart's picture
Upload folder using huggingface_hub
becb18d verified
[
{
"key": ".*",
"dtype": "ggml_q6_k",
"comment": "default use dtype q6_k"
},
{
"key": ".*mlp\\.*norm*\\.weight",
"dtype": "float32",
"comment": "gate uses float32"
},
{
"key": ".*mlp\\.gate\\.weight",
"dtype": "float32",
"comment": "gate uses float32"
},
{
"key": ".*mlp\\.(down|gate|up)_proj\\.weight",
"dtype": "ggml_q4_k",
"comment": "down gate up proj uses q4_k"
},
{
"key": "model\\.layers\\.*self_attn\\.(kv_a_proj_with_mqa|kv_b_proj|o_proj|q_a_proj|q_b_proj|)\\.weight.*",
"dtype": "ggml_q6_k",
"comment": " self attn uses q6_k"
},
{
"key": "model\\.layers\\.([0-9]|1[0-9]|2[0-9]|3[0-9]|4[0-9]|5[0-5])\\.*self_attn\\.(kv_a_proj_with_mqa|kv_b_proj|o_proj|q_a_proj|q_b_proj|)\\.weight.*",
"dtype": "ggml_q4_k",
"comment": " 0-55 self attn uses q4_k"
},
{
"key": "model\\.layers\\.([0-9]|1[0-9]|2[0-9]|3[0-9]|4[0-9]|5[0-8])\\..*experts.*(gate|up|down)_proj.*",
"dtype": "ggml_q2_k",
"comment": "0-58 moe use q2_k"
},
{
"key": "model\\.layers\\.(59)\\..*experts.*(gate|up|down)_proj.*",
"dtype": "ggml_q3_k",
"comment": "59 ffn up/gate/down moe use q3_k"
},
{
"key": "model\\.layers\\.(60)\\..*experts.*(gate|up|down)_proj.*",
"dtype": "ggml_q4_k",
"comment": "60 ffn up/gate/down moe use q4_k"
},
{
"key": ".*shared_experts.*(gate|up|down)_proj.*",
"dtype": "float16",
"comment": "shared experts use float16"
}
]