{ "bits": 4, "dynamic": { "-:model\\.layers\\.46\\.eh_proj": {}, "-:.*shared_head.*": {}, "+:.*shared_experts.*": { "bits": 8 }, "+:.*self_attn.*": { "bits": 8 }, "+:model\\.layers\\.([0-3]|4[0-6])\\..*": { "bits": 8 } }, "group_size": 128, "desc_act": false, "sym": true, "lm_head": false, "quant_method": "gptq", "checkpoint_format": "gptq", "pack_dtype": "int32", "meta": { "quantizer": [ "gptqmodel:5.4.4" ], "uri": "https://github.com/modelcloud/gptqmodel", "damp_percent": 0.05, "damp_auto_increment": 0.01, "static_groups": false, "true_sequential": true, "mse": 0.0, "gptaq": false, "gptaq_alpha": 0.25, "act_group_aware": true }, "pack_impl": "cpu", "format": "gptq" }