| { |
| "metadata": { |
| "total_size": 124608095324, |
| "total_parameters": 284299262080 |
| }, |
| "weight_map": { |
| "embed.biases": "model-00001-of-00026.safetensors", |
| "embed.scales": "model-00001-of-00026.safetensors", |
| "embed.weight": "model-00001-of-00026.safetensors", |
| "hc_head_base": "model-00026-of-00026.safetensors", |
| "hc_head_fn": "model-00026-of-00026.safetensors", |
| "hc_head_scale": "model-00026-of-00026.safetensors", |
| "head.biases": "model-00026-of-00026.safetensors", |
| "head.scales": "model-00026-of-00026.safetensors", |
| "head.weight": "model-00026-of-00026.safetensors", |
| "layers.0.attn.attn_sink": "model-00001-of-00026.safetensors", |
| "layers.0.attn.kv_norm.weight": "model-00001-of-00026.safetensors", |
| "layers.0.attn.q_norm.weight": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wkv.biases": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wkv.scales": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wkv.weight": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.0.biases": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.0.scales": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.0.weight": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.1.biases": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.1.scales": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.1.weight": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.2.biases": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.2.scales": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.2.weight": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.3.biases": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.3.scales": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.3.weight": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.4.biases": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.4.scales": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.4.weight": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.5.biases": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.5.scales": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.5.weight": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.6.biases": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.6.scales": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.6.weight": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.7.biases": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.7.scales": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_a.7.weight": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_b.biases": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_b.scales": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wo_b.weight": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wq_a.biases": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wq_a.scales": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wq_a.weight": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wq_b.biases": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wq_b.scales": "model-00001-of-00026.safetensors", |
| "layers.0.attn.wq_b.weight": "model-00001-of-00026.safetensors", |
| "layers.0.attn_norm.weight": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.experts.w1.biases": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.experts.w1.scales": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.experts.w1.weight": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.experts.w2.biases": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.experts.w2.scales": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.experts.w2.weight": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.experts.w3.biases": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.experts.w3.scales": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.experts.w3.weight": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.gate.tid2eid": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.gate.weight": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.shared_experts.w1.biases": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.shared_experts.w1.scales": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.shared_experts.w1.weight": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.shared_experts.w2.biases": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.shared_experts.w2.scales": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.shared_experts.w2.weight": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.shared_experts.w3.biases": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.shared_experts.w3.scales": "model-00001-of-00026.safetensors", |
| "layers.0.ffn.shared_experts.w3.weight": "model-00001-of-00026.safetensors", |
| "layers.0.ffn_norm.weight": "model-00001-of-00026.safetensors", |
| "layers.0.hc_attn_base": "model-00001-of-00026.safetensors", |
| "layers.0.hc_attn_fn": "model-00001-of-00026.safetensors", |
| "layers.0.hc_attn_scale": "model-00001-of-00026.safetensors", |
| "layers.0.hc_ffn_base": "model-00001-of-00026.safetensors", |
| "layers.0.hc_ffn_fn": "model-00001-of-00026.safetensors", |
| "layers.0.hc_ffn_scale": "model-00001-of-00026.safetensors", |
| "layers.1.attn.attn_sink": "model-00001-of-00026.safetensors", |
| "layers.1.attn.kv_norm.weight": "model-00001-of-00026.safetensors", |
| "layers.1.attn.q_norm.weight": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wkv.biases": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wkv.scales": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wkv.weight": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.0.biases": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.0.scales": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.0.weight": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.1.biases": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.1.scales": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.1.weight": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.2.biases": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.2.scales": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.2.weight": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.3.biases": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.3.scales": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.3.weight": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.4.biases": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.4.scales": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.4.weight": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.5.biases": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.5.scales": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.5.weight": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.6.biases": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.6.scales": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.6.weight": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.7.biases": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.7.scales": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_a.7.weight": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_b.biases": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_b.scales": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wo_b.weight": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wq_a.biases": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wq_a.scales": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wq_a.weight": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wq_b.biases": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wq_b.scales": "model-00001-of-00026.safetensors", |
| "layers.1.attn.wq_b.weight": "model-00001-of-00026.safetensors", |
| "layers.1.attn_norm.weight": "model-00002-of-00026.safetensors", |
| "layers.1.ffn.experts.w1.biases": "model-00001-of-00026.safetensors", |
| "layers.1.ffn.experts.w1.scales": "model-00001-of-00026.safetensors", |
| "layers.1.ffn.experts.w1.weight": "model-00001-of-00026.safetensors", |
| "layers.1.ffn.experts.w2.biases": "model-00001-of-00026.safetensors", |
| "layers.1.ffn.experts.w2.scales": "model-00001-of-00026.safetensors", |
| "layers.1.ffn.experts.w2.weight": "model-00001-of-00026.safetensors", |
| "layers.1.ffn.experts.w3.biases": "model-00002-of-00026.safetensors", |
| "layers.1.ffn.experts.w3.scales": "model-00002-of-00026.safetensors", |
| "layers.1.ffn.experts.w3.weight": "model-00002-of-00026.safetensors", |
| "layers.1.ffn.gate.tid2eid": "model-00001-of-00026.safetensors", |
| "layers.1.ffn.gate.weight": "model-00001-of-00026.safetensors", |
| "layers.1.ffn.shared_experts.w1.biases": "model-00002-of-00026.safetensors", |
| "layers.1.ffn.shared_experts.w1.scales": "model-00002-of-00026.safetensors", |
| "layers.1.ffn.shared_experts.w1.weight": "model-00002-of-00026.safetensors", |
| "layers.1.ffn.shared_experts.w2.biases": "model-00002-of-00026.safetensors", |
| "layers.1.ffn.shared_experts.w2.scales": "model-00002-of-00026.safetensors", |
| "layers.1.ffn.shared_experts.w2.weight": "model-00002-of-00026.safetensors", |
| "layers.1.ffn.shared_experts.w3.biases": "model-00002-of-00026.safetensors", |
| "layers.1.ffn.shared_experts.w3.scales": "model-00002-of-00026.safetensors", |
| "layers.1.ffn.shared_experts.w3.weight": "model-00002-of-00026.safetensors", |
| "layers.1.ffn_norm.weight": "model-00002-of-00026.safetensors", |
| "layers.1.hc_attn_base": "model-00002-of-00026.safetensors", |
| "layers.1.hc_attn_fn": "model-00002-of-00026.safetensors", |
| "layers.1.hc_attn_scale": "model-00002-of-00026.safetensors", |
| "layers.1.hc_ffn_base": "model-00002-of-00026.safetensors", |
| "layers.1.hc_ffn_fn": "model-00002-of-00026.safetensors", |
| "layers.1.hc_ffn_scale": "model-00002-of-00026.safetensors", |
| "layers.10.attn.attn_sink": "model-00006-of-00026.safetensors", |
| "layers.10.attn.compressor.ape": "model-00006-of-00026.safetensors", |
| "layers.10.attn.compressor.norm.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.compressor.wgate.biases": "model-00006-of-00026.safetensors", |
| "layers.10.attn.compressor.wgate.scales": "model-00006-of-00026.safetensors", |
| "layers.10.attn.compressor.wgate.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.compressor.wkv.biases": "model-00006-of-00026.safetensors", |
| "layers.10.attn.compressor.wkv.scales": "model-00006-of-00026.safetensors", |
| "layers.10.attn.compressor.wkv.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.indexer.compressor.ape": "model-00006-of-00026.safetensors", |
| "layers.10.attn.indexer.compressor.norm.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.indexer.compressor.wgate.biases": "model-00006-of-00026.safetensors", |
| "layers.10.attn.indexer.compressor.wgate.scales": "model-00006-of-00026.safetensors", |
| "layers.10.attn.indexer.compressor.wgate.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.indexer.compressor.wkv.biases": "model-00006-of-00026.safetensors", |
| "layers.10.attn.indexer.compressor.wkv.scales": "model-00006-of-00026.safetensors", |
| "layers.10.attn.indexer.compressor.wkv.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.indexer.weights_proj.biases": "model-00006-of-00026.safetensors", |
| "layers.10.attn.indexer.weights_proj.scales": "model-00006-of-00026.safetensors", |
| "layers.10.attn.indexer.weights_proj.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.indexer.wq_b.biases": "model-00006-of-00026.safetensors", |
| "layers.10.attn.indexer.wq_b.scales": "model-00006-of-00026.safetensors", |
| "layers.10.attn.indexer.wq_b.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.kv_norm.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.q_norm.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wkv.biases": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wkv.scales": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wkv.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.0.biases": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.0.scales": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.0.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.1.biases": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.1.scales": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.1.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.2.biases": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.2.scales": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.2.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.3.biases": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.3.scales": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.3.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.4.biases": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.4.scales": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.4.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.5.biases": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.5.scales": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.5.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.6.biases": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.6.scales": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.6.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.7.biases": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.7.scales": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_a.7.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_b.biases": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_b.scales": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wo_b.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wq_a.biases": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wq_a.scales": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wq_a.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wq_b.biases": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wq_b.scales": "model-00006-of-00026.safetensors", |
| "layers.10.attn.wq_b.weight": "model-00006-of-00026.safetensors", |
| "layers.10.attn_norm.weight": "model-00007-of-00026.safetensors", |
| "layers.10.ffn.experts.w1.biases": "model-00007-of-00026.safetensors", |
| "layers.10.ffn.experts.w1.scales": "model-00007-of-00026.safetensors", |
| "layers.10.ffn.experts.w1.weight": "model-00007-of-00026.safetensors", |
| "layers.10.ffn.experts.w2.biases": "model-00007-of-00026.safetensors", |
| "layers.10.ffn.experts.w2.scales": "model-00007-of-00026.safetensors", |
| "layers.10.ffn.experts.w2.weight": "model-00007-of-00026.safetensors", |
| "layers.10.ffn.experts.w3.biases": "model-00007-of-00026.safetensors", |
| "layers.10.ffn.experts.w3.scales": "model-00007-of-00026.safetensors", |
| "layers.10.ffn.experts.w3.weight": "model-00007-of-00026.safetensors", |
| "layers.10.ffn.gate.bias": "model-00006-of-00026.safetensors", |
| "layers.10.ffn.gate.weight": "model-00006-of-00026.safetensors", |
| "layers.10.ffn.shared_experts.w1.biases": "model-00007-of-00026.safetensors", |
| "layers.10.ffn.shared_experts.w1.scales": "model-00007-of-00026.safetensors", |
| "layers.10.ffn.shared_experts.w1.weight": "model-00007-of-00026.safetensors", |
| "layers.10.ffn.shared_experts.w2.biases": "model-00007-of-00026.safetensors", |
| "layers.10.ffn.shared_experts.w2.scales": "model-00007-of-00026.safetensors", |
| "layers.10.ffn.shared_experts.w2.weight": "model-00007-of-00026.safetensors", |
| "layers.10.ffn.shared_experts.w3.biases": "model-00007-of-00026.safetensors", |
| "layers.10.ffn.shared_experts.w3.scales": "model-00007-of-00026.safetensors", |
| "layers.10.ffn.shared_experts.w3.weight": "model-00007-of-00026.safetensors", |
| "layers.10.ffn_norm.weight": "model-00007-of-00026.safetensors", |
| "layers.10.hc_attn_base": "model-00007-of-00026.safetensors", |
| "layers.10.hc_attn_fn": "model-00007-of-00026.safetensors", |
| "layers.10.hc_attn_scale": "model-00007-of-00026.safetensors", |
| "layers.10.hc_ffn_base": "model-00007-of-00026.safetensors", |
| "layers.10.hc_ffn_fn": "model-00007-of-00026.safetensors", |
| "layers.10.hc_ffn_scale": "model-00007-of-00026.safetensors", |
| "layers.11.attn.attn_sink": "model-00007-of-00026.safetensors", |
| "layers.11.attn.compressor.ape": "model-00007-of-00026.safetensors", |
| "layers.11.attn.compressor.norm.weight": "model-00007-of-00026.safetensors", |
| "layers.11.attn.compressor.wgate.biases": "model-00007-of-00026.safetensors", |
| "layers.11.attn.compressor.wgate.scales": "model-00007-of-00026.safetensors", |
| "layers.11.attn.compressor.wgate.weight": "model-00007-of-00026.safetensors", |
| "layers.11.attn.compressor.wkv.biases": "model-00007-of-00026.safetensors", |
| "layers.11.attn.compressor.wkv.scales": "model-00007-of-00026.safetensors", |
| "layers.11.attn.compressor.wkv.weight": "model-00007-of-00026.safetensors", |
| "layers.11.attn.kv_norm.weight": "model-00007-of-00026.safetensors", |
| "layers.11.attn.q_norm.weight": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wkv.biases": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wkv.scales": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wkv.weight": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.0.biases": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.0.scales": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.0.weight": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.1.biases": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.1.scales": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.1.weight": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.2.biases": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.2.scales": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.2.weight": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.3.biases": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.3.scales": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.3.weight": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.4.biases": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.4.scales": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.4.weight": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.5.biases": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.5.scales": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.5.weight": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.6.biases": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.6.scales": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.6.weight": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.7.biases": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.7.scales": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_a.7.weight": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_b.biases": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_b.scales": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wo_b.weight": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wq_a.biases": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wq_a.scales": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wq_a.weight": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wq_b.biases": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wq_b.scales": "model-00007-of-00026.safetensors", |
| "layers.11.attn.wq_b.weight": "model-00007-of-00026.safetensors", |
| "layers.11.attn_norm.weight": "model-00008-of-00026.safetensors", |
| "layers.11.ffn.experts.w1.biases": "model-00007-of-00026.safetensors", |
| "layers.11.ffn.experts.w1.scales": "model-00007-of-00026.safetensors", |
| "layers.11.ffn.experts.w1.weight": "model-00007-of-00026.safetensors", |
| "layers.11.ffn.experts.w2.biases": "model-00007-of-00026.safetensors", |
| "layers.11.ffn.experts.w2.scales": "model-00007-of-00026.safetensors", |
| "layers.11.ffn.experts.w2.weight": "model-00007-of-00026.safetensors", |
| "layers.11.ffn.experts.w3.biases": "model-00008-of-00026.safetensors", |
| "layers.11.ffn.experts.w3.scales": "model-00008-of-00026.safetensors", |
| "layers.11.ffn.experts.w3.weight": "model-00008-of-00026.safetensors", |
| "layers.11.ffn.gate.bias": "model-00007-of-00026.safetensors", |
| "layers.11.ffn.gate.weight": "model-00007-of-00026.safetensors", |
| "layers.11.ffn.shared_experts.w1.biases": "model-00008-of-00026.safetensors", |
| "layers.11.ffn.shared_experts.w1.scales": "model-00008-of-00026.safetensors", |
| "layers.11.ffn.shared_experts.w1.weight": "model-00008-of-00026.safetensors", |
| "layers.11.ffn.shared_experts.w2.biases": "model-00008-of-00026.safetensors", |
| "layers.11.ffn.shared_experts.w2.scales": "model-00008-of-00026.safetensors", |
| "layers.11.ffn.shared_experts.w2.weight": "model-00008-of-00026.safetensors", |
| "layers.11.ffn.shared_experts.w3.biases": "model-00008-of-00026.safetensors", |
| "layers.11.ffn.shared_experts.w3.scales": "model-00008-of-00026.safetensors", |
| "layers.11.ffn.shared_experts.w3.weight": "model-00008-of-00026.safetensors", |
| "layers.11.ffn_norm.weight": "model-00008-of-00026.safetensors", |
| "layers.11.hc_attn_base": "model-00008-of-00026.safetensors", |
| "layers.11.hc_attn_fn": "model-00008-of-00026.safetensors", |
| "layers.11.hc_attn_scale": "model-00008-of-00026.safetensors", |
| "layers.11.hc_ffn_base": "model-00008-of-00026.safetensors", |
| "layers.11.hc_ffn_fn": "model-00008-of-00026.safetensors", |
| "layers.11.hc_ffn_scale": "model-00008-of-00026.safetensors", |
| "layers.12.attn.attn_sink": "model-00008-of-00026.safetensors", |
| "layers.12.attn.compressor.ape": "model-00008-of-00026.safetensors", |
| "layers.12.attn.compressor.norm.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.compressor.wgate.biases": "model-00008-of-00026.safetensors", |
| "layers.12.attn.compressor.wgate.scales": "model-00008-of-00026.safetensors", |
| "layers.12.attn.compressor.wgate.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.compressor.wkv.biases": "model-00008-of-00026.safetensors", |
| "layers.12.attn.compressor.wkv.scales": "model-00008-of-00026.safetensors", |
| "layers.12.attn.compressor.wkv.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.indexer.compressor.ape": "model-00008-of-00026.safetensors", |
| "layers.12.attn.indexer.compressor.norm.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.indexer.compressor.wgate.biases": "model-00008-of-00026.safetensors", |
| "layers.12.attn.indexer.compressor.wgate.scales": "model-00008-of-00026.safetensors", |
| "layers.12.attn.indexer.compressor.wgate.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.indexer.compressor.wkv.biases": "model-00008-of-00026.safetensors", |
| "layers.12.attn.indexer.compressor.wkv.scales": "model-00008-of-00026.safetensors", |
| "layers.12.attn.indexer.compressor.wkv.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.indexer.weights_proj.biases": "model-00008-of-00026.safetensors", |
| "layers.12.attn.indexer.weights_proj.scales": "model-00008-of-00026.safetensors", |
| "layers.12.attn.indexer.weights_proj.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.indexer.wq_b.biases": "model-00008-of-00026.safetensors", |
| "layers.12.attn.indexer.wq_b.scales": "model-00008-of-00026.safetensors", |
| "layers.12.attn.indexer.wq_b.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.kv_norm.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.q_norm.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wkv.biases": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wkv.scales": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wkv.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.0.biases": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.0.scales": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.0.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.1.biases": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.1.scales": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.1.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.2.biases": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.2.scales": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.2.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.3.biases": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.3.scales": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.3.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.4.biases": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.4.scales": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.4.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.5.biases": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.5.scales": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.5.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.6.biases": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.6.scales": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.6.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.7.biases": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.7.scales": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_a.7.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_b.biases": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_b.scales": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wo_b.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wq_a.biases": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wq_a.scales": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wq_a.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wq_b.biases": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wq_b.scales": "model-00008-of-00026.safetensors", |
| "layers.12.attn.wq_b.weight": "model-00008-of-00026.safetensors", |
| "layers.12.attn_norm.weight": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.experts.w1.biases": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.experts.w1.scales": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.experts.w1.weight": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.experts.w2.biases": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.experts.w2.scales": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.experts.w2.weight": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.experts.w3.biases": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.experts.w3.scales": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.experts.w3.weight": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.gate.bias": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.gate.weight": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.shared_experts.w1.biases": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.shared_experts.w1.scales": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.shared_experts.w1.weight": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.shared_experts.w2.biases": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.shared_experts.w2.scales": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.shared_experts.w2.weight": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.shared_experts.w3.biases": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.shared_experts.w3.scales": "model-00008-of-00026.safetensors", |
| "layers.12.ffn.shared_experts.w3.weight": "model-00008-of-00026.safetensors", |
| "layers.12.ffn_norm.weight": "model-00008-of-00026.safetensors", |
| "layers.12.hc_attn_base": "model-00008-of-00026.safetensors", |
| "layers.12.hc_attn_fn": "model-00008-of-00026.safetensors", |
| "layers.12.hc_attn_scale": "model-00008-of-00026.safetensors", |
| "layers.12.hc_ffn_base": "model-00008-of-00026.safetensors", |
| "layers.12.hc_ffn_fn": "model-00008-of-00026.safetensors", |
| "layers.12.hc_ffn_scale": "model-00008-of-00026.safetensors", |
| "layers.13.attn.attn_sink": "model-00008-of-00026.safetensors", |
| "layers.13.attn.compressor.ape": "model-00008-of-00026.safetensors", |
| "layers.13.attn.compressor.norm.weight": "model-00008-of-00026.safetensors", |
| "layers.13.attn.compressor.wgate.biases": "model-00008-of-00026.safetensors", |
| "layers.13.attn.compressor.wgate.scales": "model-00008-of-00026.safetensors", |
| "layers.13.attn.compressor.wgate.weight": "model-00008-of-00026.safetensors", |
| "layers.13.attn.compressor.wkv.biases": "model-00008-of-00026.safetensors", |
| "layers.13.attn.compressor.wkv.scales": "model-00008-of-00026.safetensors", |
| "layers.13.attn.compressor.wkv.weight": "model-00008-of-00026.safetensors", |
| "layers.13.attn.kv_norm.weight": "model-00008-of-00026.safetensors", |
| "layers.13.attn.q_norm.weight": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wkv.biases": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wkv.scales": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wkv.weight": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.0.biases": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.0.scales": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.0.weight": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.1.biases": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.1.scales": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.1.weight": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.2.biases": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.2.scales": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.2.weight": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.3.biases": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.3.scales": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.3.weight": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.4.biases": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.4.scales": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.4.weight": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.5.biases": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.5.scales": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.5.weight": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.6.biases": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.6.scales": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.6.weight": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.7.biases": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.7.scales": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_a.7.weight": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_b.biases": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_b.scales": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wo_b.weight": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wq_a.biases": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wq_a.scales": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wq_a.weight": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wq_b.biases": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wq_b.scales": "model-00008-of-00026.safetensors", |
| "layers.13.attn.wq_b.weight": "model-00008-of-00026.safetensors", |
| "layers.13.attn_norm.weight": "model-00009-of-00026.safetensors", |
| "layers.13.ffn.experts.w1.biases": "model-00008-of-00026.safetensors", |
| "layers.13.ffn.experts.w1.scales": "model-00008-of-00026.safetensors", |
| "layers.13.ffn.experts.w1.weight": "model-00008-of-00026.safetensors", |
| "layers.13.ffn.experts.w2.biases": "model-00009-of-00026.safetensors", |
| "layers.13.ffn.experts.w2.scales": "model-00009-of-00026.safetensors", |
| "layers.13.ffn.experts.w2.weight": "model-00009-of-00026.safetensors", |
| "layers.13.ffn.experts.w3.biases": "model-00009-of-00026.safetensors", |
| "layers.13.ffn.experts.w3.scales": "model-00009-of-00026.safetensors", |
| "layers.13.ffn.experts.w3.weight": "model-00009-of-00026.safetensors", |
| "layers.13.ffn.gate.bias": "model-00008-of-00026.safetensors", |
| "layers.13.ffn.gate.weight": "model-00008-of-00026.safetensors", |
| "layers.13.ffn.shared_experts.w1.biases": "model-00009-of-00026.safetensors", |
| "layers.13.ffn.shared_experts.w1.scales": "model-00009-of-00026.safetensors", |
| "layers.13.ffn.shared_experts.w1.weight": "model-00009-of-00026.safetensors", |
| "layers.13.ffn.shared_experts.w2.biases": "model-00009-of-00026.safetensors", |
| "layers.13.ffn.shared_experts.w2.scales": "model-00009-of-00026.safetensors", |
| "layers.13.ffn.shared_experts.w2.weight": "model-00009-of-00026.safetensors", |
| "layers.13.ffn.shared_experts.w3.biases": "model-00009-of-00026.safetensors", |
| "layers.13.ffn.shared_experts.w3.scales": "model-00009-of-00026.safetensors", |
| "layers.13.ffn.shared_experts.w3.weight": "model-00009-of-00026.safetensors", |
| "layers.13.ffn_norm.weight": "model-00009-of-00026.safetensors", |
| "layers.13.hc_attn_base": "model-00009-of-00026.safetensors", |
| "layers.13.hc_attn_fn": "model-00009-of-00026.safetensors", |
| "layers.13.hc_attn_scale": "model-00009-of-00026.safetensors", |
| "layers.13.hc_ffn_base": "model-00009-of-00026.safetensors", |
| "layers.13.hc_ffn_fn": "model-00009-of-00026.safetensors", |
| "layers.13.hc_ffn_scale": "model-00009-of-00026.safetensors", |
| "layers.14.attn.attn_sink": "model-00009-of-00026.safetensors", |
| "layers.14.attn.compressor.ape": "model-00009-of-00026.safetensors", |
| "layers.14.attn.compressor.norm.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.compressor.wgate.biases": "model-00009-of-00026.safetensors", |
| "layers.14.attn.compressor.wgate.scales": "model-00009-of-00026.safetensors", |
| "layers.14.attn.compressor.wgate.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.compressor.wkv.biases": "model-00009-of-00026.safetensors", |
| "layers.14.attn.compressor.wkv.scales": "model-00009-of-00026.safetensors", |
| "layers.14.attn.compressor.wkv.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.indexer.compressor.ape": "model-00009-of-00026.safetensors", |
| "layers.14.attn.indexer.compressor.norm.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.indexer.compressor.wgate.biases": "model-00009-of-00026.safetensors", |
| "layers.14.attn.indexer.compressor.wgate.scales": "model-00009-of-00026.safetensors", |
| "layers.14.attn.indexer.compressor.wgate.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.indexer.compressor.wkv.biases": "model-00009-of-00026.safetensors", |
| "layers.14.attn.indexer.compressor.wkv.scales": "model-00009-of-00026.safetensors", |
| "layers.14.attn.indexer.compressor.wkv.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.indexer.weights_proj.biases": "model-00009-of-00026.safetensors", |
| "layers.14.attn.indexer.weights_proj.scales": "model-00009-of-00026.safetensors", |
| "layers.14.attn.indexer.weights_proj.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.indexer.wq_b.biases": "model-00009-of-00026.safetensors", |
| "layers.14.attn.indexer.wq_b.scales": "model-00009-of-00026.safetensors", |
| "layers.14.attn.indexer.wq_b.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.kv_norm.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.q_norm.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wkv.biases": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wkv.scales": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wkv.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.0.biases": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.0.scales": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.0.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.1.biases": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.1.scales": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.1.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.2.biases": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.2.scales": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.2.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.3.biases": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.3.scales": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.3.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.4.biases": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.4.scales": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.4.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.5.biases": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.5.scales": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.5.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.6.biases": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.6.scales": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.6.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.7.biases": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.7.scales": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_a.7.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_b.biases": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_b.scales": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wo_b.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wq_a.biases": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wq_a.scales": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wq_a.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wq_b.biases": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wq_b.scales": "model-00009-of-00026.safetensors", |
| "layers.14.attn.wq_b.weight": "model-00009-of-00026.safetensors", |
| "layers.14.attn_norm.weight": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.experts.w1.biases": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.experts.w1.scales": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.experts.w1.weight": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.experts.w2.biases": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.experts.w2.scales": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.experts.w2.weight": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.experts.w3.biases": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.experts.w3.scales": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.experts.w3.weight": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.gate.bias": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.gate.weight": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.shared_experts.w1.biases": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.shared_experts.w1.scales": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.shared_experts.w1.weight": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.shared_experts.w2.biases": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.shared_experts.w2.scales": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.shared_experts.w2.weight": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.shared_experts.w3.biases": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.shared_experts.w3.scales": "model-00009-of-00026.safetensors", |
| "layers.14.ffn.shared_experts.w3.weight": "model-00009-of-00026.safetensors", |
| "layers.14.ffn_norm.weight": "model-00009-of-00026.safetensors", |
| "layers.14.hc_attn_base": "model-00009-of-00026.safetensors", |
| "layers.14.hc_attn_fn": "model-00009-of-00026.safetensors", |
| "layers.14.hc_attn_scale": "model-00009-of-00026.safetensors", |
| "layers.14.hc_ffn_base": "model-00009-of-00026.safetensors", |
| "layers.14.hc_ffn_fn": "model-00009-of-00026.safetensors", |
| "layers.14.hc_ffn_scale": "model-00009-of-00026.safetensors", |
| "layers.15.attn.attn_sink": "model-00009-of-00026.safetensors", |
| "layers.15.attn.compressor.ape": "model-00009-of-00026.safetensors", |
| "layers.15.attn.compressor.norm.weight": "model-00009-of-00026.safetensors", |
| "layers.15.attn.compressor.wgate.biases": "model-00009-of-00026.safetensors", |
| "layers.15.attn.compressor.wgate.scales": "model-00009-of-00026.safetensors", |
| "layers.15.attn.compressor.wgate.weight": "model-00009-of-00026.safetensors", |
| "layers.15.attn.compressor.wkv.biases": "model-00009-of-00026.safetensors", |
| "layers.15.attn.compressor.wkv.scales": "model-00009-of-00026.safetensors", |
| "layers.15.attn.compressor.wkv.weight": "model-00009-of-00026.safetensors", |
| "layers.15.attn.kv_norm.weight": "model-00009-of-00026.safetensors", |
| "layers.15.attn.q_norm.weight": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wkv.biases": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wkv.scales": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wkv.weight": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.0.biases": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.0.scales": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.0.weight": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.1.biases": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.1.scales": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.1.weight": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.2.biases": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.2.scales": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.2.weight": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.3.biases": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.3.scales": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.3.weight": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.4.biases": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.4.scales": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.4.weight": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.5.biases": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.5.scales": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.5.weight": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.6.biases": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.6.scales": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.6.weight": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.7.biases": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.7.scales": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_a.7.weight": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_b.biases": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_b.scales": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wo_b.weight": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wq_a.biases": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wq_a.scales": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wq_a.weight": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wq_b.biases": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wq_b.scales": "model-00009-of-00026.safetensors", |
| "layers.15.attn.wq_b.weight": "model-00009-of-00026.safetensors", |
| "layers.15.attn_norm.weight": "model-00010-of-00026.safetensors", |
| "layers.15.ffn.experts.w1.biases": "model-00010-of-00026.safetensors", |
| "layers.15.ffn.experts.w1.scales": "model-00010-of-00026.safetensors", |
| "layers.15.ffn.experts.w1.weight": "model-00010-of-00026.safetensors", |
| "layers.15.ffn.experts.w2.biases": "model-00010-of-00026.safetensors", |
| "layers.15.ffn.experts.w2.scales": "model-00010-of-00026.safetensors", |
| "layers.15.ffn.experts.w2.weight": "model-00010-of-00026.safetensors", |
| "layers.15.ffn.experts.w3.biases": "model-00010-of-00026.safetensors", |
| "layers.15.ffn.experts.w3.scales": "model-00010-of-00026.safetensors", |
| "layers.15.ffn.experts.w3.weight": "model-00010-of-00026.safetensors", |
| "layers.15.ffn.gate.bias": "model-00009-of-00026.safetensors", |
| "layers.15.ffn.gate.weight": "model-00009-of-00026.safetensors", |
| "layers.15.ffn.shared_experts.w1.biases": "model-00010-of-00026.safetensors", |
| "layers.15.ffn.shared_experts.w1.scales": "model-00010-of-00026.safetensors", |
| "layers.15.ffn.shared_experts.w1.weight": "model-00010-of-00026.safetensors", |
| "layers.15.ffn.shared_experts.w2.biases": "model-00010-of-00026.safetensors", |
| "layers.15.ffn.shared_experts.w2.scales": "model-00010-of-00026.safetensors", |
| "layers.15.ffn.shared_experts.w2.weight": "model-00010-of-00026.safetensors", |
| "layers.15.ffn.shared_experts.w3.biases": "model-00010-of-00026.safetensors", |
| "layers.15.ffn.shared_experts.w3.scales": "model-00010-of-00026.safetensors", |
| "layers.15.ffn.shared_experts.w3.weight": "model-00010-of-00026.safetensors", |
| "layers.15.ffn_norm.weight": "model-00010-of-00026.safetensors", |
| "layers.15.hc_attn_base": "model-00010-of-00026.safetensors", |
| "layers.15.hc_attn_fn": "model-00010-of-00026.safetensors", |
| "layers.15.hc_attn_scale": "model-00010-of-00026.safetensors", |
| "layers.15.hc_ffn_base": "model-00010-of-00026.safetensors", |
| "layers.15.hc_ffn_fn": "model-00010-of-00026.safetensors", |
| "layers.15.hc_ffn_scale": "model-00010-of-00026.safetensors", |
| "layers.16.attn.attn_sink": "model-00010-of-00026.safetensors", |
| "layers.16.attn.compressor.ape": "model-00010-of-00026.safetensors", |
| "layers.16.attn.compressor.norm.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.compressor.wgate.biases": "model-00010-of-00026.safetensors", |
| "layers.16.attn.compressor.wgate.scales": "model-00010-of-00026.safetensors", |
| "layers.16.attn.compressor.wgate.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.compressor.wkv.biases": "model-00010-of-00026.safetensors", |
| "layers.16.attn.compressor.wkv.scales": "model-00010-of-00026.safetensors", |
| "layers.16.attn.compressor.wkv.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.indexer.compressor.ape": "model-00010-of-00026.safetensors", |
| "layers.16.attn.indexer.compressor.norm.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.indexer.compressor.wgate.biases": "model-00010-of-00026.safetensors", |
| "layers.16.attn.indexer.compressor.wgate.scales": "model-00010-of-00026.safetensors", |
| "layers.16.attn.indexer.compressor.wgate.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.indexer.compressor.wkv.biases": "model-00010-of-00026.safetensors", |
| "layers.16.attn.indexer.compressor.wkv.scales": "model-00010-of-00026.safetensors", |
| "layers.16.attn.indexer.compressor.wkv.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.indexer.weights_proj.biases": "model-00010-of-00026.safetensors", |
| "layers.16.attn.indexer.weights_proj.scales": "model-00010-of-00026.safetensors", |
| "layers.16.attn.indexer.weights_proj.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.indexer.wq_b.biases": "model-00010-of-00026.safetensors", |
| "layers.16.attn.indexer.wq_b.scales": "model-00010-of-00026.safetensors", |
| "layers.16.attn.indexer.wq_b.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.kv_norm.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.q_norm.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wkv.biases": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wkv.scales": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wkv.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.0.biases": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.0.scales": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.0.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.1.biases": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.1.scales": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.1.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.2.biases": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.2.scales": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.2.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.3.biases": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.3.scales": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.3.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.4.biases": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.4.scales": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.4.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.5.biases": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.5.scales": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.5.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.6.biases": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.6.scales": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.6.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.7.biases": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.7.scales": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_a.7.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_b.biases": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_b.scales": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wo_b.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wq_a.biases": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wq_a.scales": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wq_a.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wq_b.biases": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wq_b.scales": "model-00010-of-00026.safetensors", |
| "layers.16.attn.wq_b.weight": "model-00010-of-00026.safetensors", |
| "layers.16.attn_norm.weight": "model-00011-of-00026.safetensors", |
| "layers.16.ffn.experts.w1.biases": "model-00010-of-00026.safetensors", |
| "layers.16.ffn.experts.w1.scales": "model-00010-of-00026.safetensors", |
| "layers.16.ffn.experts.w1.weight": "model-00010-of-00026.safetensors", |
| "layers.16.ffn.experts.w2.biases": "model-00010-of-00026.safetensors", |
| "layers.16.ffn.experts.w2.scales": "model-00010-of-00026.safetensors", |
| "layers.16.ffn.experts.w2.weight": "model-00010-of-00026.safetensors", |
| "layers.16.ffn.experts.w3.biases": "model-00011-of-00026.safetensors", |
| "layers.16.ffn.experts.w3.scales": "model-00011-of-00026.safetensors", |
| "layers.16.ffn.experts.w3.weight": "model-00011-of-00026.safetensors", |
| "layers.16.ffn.gate.bias": "model-00010-of-00026.safetensors", |
| "layers.16.ffn.gate.weight": "model-00010-of-00026.safetensors", |
| "layers.16.ffn.shared_experts.w1.biases": "model-00011-of-00026.safetensors", |
| "layers.16.ffn.shared_experts.w1.scales": "model-00011-of-00026.safetensors", |
| "layers.16.ffn.shared_experts.w1.weight": "model-00011-of-00026.safetensors", |
| "layers.16.ffn.shared_experts.w2.biases": "model-00011-of-00026.safetensors", |
| "layers.16.ffn.shared_experts.w2.scales": "model-00011-of-00026.safetensors", |
| "layers.16.ffn.shared_experts.w2.weight": "model-00011-of-00026.safetensors", |
| "layers.16.ffn.shared_experts.w3.biases": "model-00011-of-00026.safetensors", |
| "layers.16.ffn.shared_experts.w3.scales": "model-00011-of-00026.safetensors", |
| "layers.16.ffn.shared_experts.w3.weight": "model-00011-of-00026.safetensors", |
| "layers.16.ffn_norm.weight": "model-00011-of-00026.safetensors", |
| "layers.16.hc_attn_base": "model-00011-of-00026.safetensors", |
| "layers.16.hc_attn_fn": "model-00011-of-00026.safetensors", |
| "layers.16.hc_attn_scale": "model-00011-of-00026.safetensors", |
| "layers.16.hc_ffn_base": "model-00011-of-00026.safetensors", |
| "layers.16.hc_ffn_fn": "model-00011-of-00026.safetensors", |
| "layers.16.hc_ffn_scale": "model-00011-of-00026.safetensors", |
| "layers.17.attn.attn_sink": "model-00011-of-00026.safetensors", |
| "layers.17.attn.compressor.ape": "model-00011-of-00026.safetensors", |
| "layers.17.attn.compressor.norm.weight": "model-00011-of-00026.safetensors", |
| "layers.17.attn.compressor.wgate.biases": "model-00011-of-00026.safetensors", |
| "layers.17.attn.compressor.wgate.scales": "model-00011-of-00026.safetensors", |
| "layers.17.attn.compressor.wgate.weight": "model-00011-of-00026.safetensors", |
| "layers.17.attn.compressor.wkv.biases": "model-00011-of-00026.safetensors", |
| "layers.17.attn.compressor.wkv.scales": "model-00011-of-00026.safetensors", |
| "layers.17.attn.compressor.wkv.weight": "model-00011-of-00026.safetensors", |
| "layers.17.attn.kv_norm.weight": "model-00011-of-00026.safetensors", |
| "layers.17.attn.q_norm.weight": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wkv.biases": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wkv.scales": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wkv.weight": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.0.biases": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.0.scales": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.0.weight": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.1.biases": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.1.scales": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.1.weight": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.2.biases": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.2.scales": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.2.weight": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.3.biases": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.3.scales": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.3.weight": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.4.biases": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.4.scales": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.4.weight": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.5.biases": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.5.scales": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.5.weight": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.6.biases": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.6.scales": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.6.weight": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.7.biases": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.7.scales": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_a.7.weight": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_b.biases": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_b.scales": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wo_b.weight": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wq_a.biases": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wq_a.scales": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wq_a.weight": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wq_b.biases": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wq_b.scales": "model-00011-of-00026.safetensors", |
| "layers.17.attn.wq_b.weight": "model-00011-of-00026.safetensors", |
| "layers.17.attn_norm.weight": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.experts.w1.biases": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.experts.w1.scales": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.experts.w1.weight": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.experts.w2.biases": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.experts.w2.scales": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.experts.w2.weight": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.experts.w3.biases": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.experts.w3.scales": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.experts.w3.weight": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.gate.bias": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.gate.weight": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.shared_experts.w1.biases": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.shared_experts.w1.scales": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.shared_experts.w1.weight": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.shared_experts.w2.biases": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.shared_experts.w2.scales": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.shared_experts.w2.weight": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.shared_experts.w3.biases": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.shared_experts.w3.scales": "model-00011-of-00026.safetensors", |
| "layers.17.ffn.shared_experts.w3.weight": "model-00011-of-00026.safetensors", |
| "layers.17.ffn_norm.weight": "model-00011-of-00026.safetensors", |
| "layers.17.hc_attn_base": "model-00011-of-00026.safetensors", |
| "layers.17.hc_attn_fn": "model-00011-of-00026.safetensors", |
| "layers.17.hc_attn_scale": "model-00011-of-00026.safetensors", |
| "layers.17.hc_ffn_base": "model-00011-of-00026.safetensors", |
| "layers.17.hc_ffn_fn": "model-00011-of-00026.safetensors", |
| "layers.17.hc_ffn_scale": "model-00011-of-00026.safetensors", |
| "layers.18.attn.attn_sink": "model-00011-of-00026.safetensors", |
| "layers.18.attn.compressor.ape": "model-00011-of-00026.safetensors", |
| "layers.18.attn.compressor.norm.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.compressor.wgate.biases": "model-00011-of-00026.safetensors", |
| "layers.18.attn.compressor.wgate.scales": "model-00011-of-00026.safetensors", |
| "layers.18.attn.compressor.wgate.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.compressor.wkv.biases": "model-00011-of-00026.safetensors", |
| "layers.18.attn.compressor.wkv.scales": "model-00011-of-00026.safetensors", |
| "layers.18.attn.compressor.wkv.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.indexer.compressor.ape": "model-00011-of-00026.safetensors", |
| "layers.18.attn.indexer.compressor.norm.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.indexer.compressor.wgate.biases": "model-00011-of-00026.safetensors", |
| "layers.18.attn.indexer.compressor.wgate.scales": "model-00011-of-00026.safetensors", |
| "layers.18.attn.indexer.compressor.wgate.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.indexer.compressor.wkv.biases": "model-00011-of-00026.safetensors", |
| "layers.18.attn.indexer.compressor.wkv.scales": "model-00011-of-00026.safetensors", |
| "layers.18.attn.indexer.compressor.wkv.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.indexer.weights_proj.biases": "model-00011-of-00026.safetensors", |
| "layers.18.attn.indexer.weights_proj.scales": "model-00011-of-00026.safetensors", |
| "layers.18.attn.indexer.weights_proj.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.indexer.wq_b.biases": "model-00011-of-00026.safetensors", |
| "layers.18.attn.indexer.wq_b.scales": "model-00011-of-00026.safetensors", |
| "layers.18.attn.indexer.wq_b.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.kv_norm.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.q_norm.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wkv.biases": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wkv.scales": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wkv.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.0.biases": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.0.scales": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.0.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.1.biases": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.1.scales": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.1.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.2.biases": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.2.scales": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.2.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.3.biases": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.3.scales": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.3.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.4.biases": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.4.scales": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.4.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.5.biases": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.5.scales": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.5.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.6.biases": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.6.scales": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.6.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.7.biases": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.7.scales": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_a.7.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_b.biases": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_b.scales": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wo_b.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wq_a.biases": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wq_a.scales": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wq_a.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wq_b.biases": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wq_b.scales": "model-00011-of-00026.safetensors", |
| "layers.18.attn.wq_b.weight": "model-00011-of-00026.safetensors", |
| "layers.18.attn_norm.weight": "model-00012-of-00026.safetensors", |
| "layers.18.ffn.experts.w1.biases": "model-00011-of-00026.safetensors", |
| "layers.18.ffn.experts.w1.scales": "model-00011-of-00026.safetensors", |
| "layers.18.ffn.experts.w1.weight": "model-00011-of-00026.safetensors", |
| "layers.18.ffn.experts.w2.biases": "model-00012-of-00026.safetensors", |
| "layers.18.ffn.experts.w2.scales": "model-00012-of-00026.safetensors", |
| "layers.18.ffn.experts.w2.weight": "model-00012-of-00026.safetensors", |
| "layers.18.ffn.experts.w3.biases": "model-00012-of-00026.safetensors", |
| "layers.18.ffn.experts.w3.scales": "model-00012-of-00026.safetensors", |
| "layers.18.ffn.experts.w3.weight": "model-00012-of-00026.safetensors", |
| "layers.18.ffn.gate.bias": "model-00011-of-00026.safetensors", |
| "layers.18.ffn.gate.weight": "model-00011-of-00026.safetensors", |
| "layers.18.ffn.shared_experts.w1.biases": "model-00012-of-00026.safetensors", |
| "layers.18.ffn.shared_experts.w1.scales": "model-00012-of-00026.safetensors", |
| "layers.18.ffn.shared_experts.w1.weight": "model-00012-of-00026.safetensors", |
| "layers.18.ffn.shared_experts.w2.biases": "model-00012-of-00026.safetensors", |
| "layers.18.ffn.shared_experts.w2.scales": "model-00012-of-00026.safetensors", |
| "layers.18.ffn.shared_experts.w2.weight": "model-00012-of-00026.safetensors", |
| "layers.18.ffn.shared_experts.w3.biases": "model-00012-of-00026.safetensors", |
| "layers.18.ffn.shared_experts.w3.scales": "model-00012-of-00026.safetensors", |
| "layers.18.ffn.shared_experts.w3.weight": "model-00012-of-00026.safetensors", |
| "layers.18.ffn_norm.weight": "model-00012-of-00026.safetensors", |
| "layers.18.hc_attn_base": "model-00012-of-00026.safetensors", |
| "layers.18.hc_attn_fn": "model-00012-of-00026.safetensors", |
| "layers.18.hc_attn_scale": "model-00012-of-00026.safetensors", |
| "layers.18.hc_ffn_base": "model-00012-of-00026.safetensors", |
| "layers.18.hc_ffn_fn": "model-00012-of-00026.safetensors", |
| "layers.18.hc_ffn_scale": "model-00012-of-00026.safetensors", |
| "layers.19.attn.attn_sink": "model-00012-of-00026.safetensors", |
| "layers.19.attn.compressor.ape": "model-00012-of-00026.safetensors", |
| "layers.19.attn.compressor.norm.weight": "model-00012-of-00026.safetensors", |
| "layers.19.attn.compressor.wgate.biases": "model-00012-of-00026.safetensors", |
| "layers.19.attn.compressor.wgate.scales": "model-00012-of-00026.safetensors", |
| "layers.19.attn.compressor.wgate.weight": "model-00012-of-00026.safetensors", |
| "layers.19.attn.compressor.wkv.biases": "model-00012-of-00026.safetensors", |
| "layers.19.attn.compressor.wkv.scales": "model-00012-of-00026.safetensors", |
| "layers.19.attn.compressor.wkv.weight": "model-00012-of-00026.safetensors", |
| "layers.19.attn.kv_norm.weight": "model-00012-of-00026.safetensors", |
| "layers.19.attn.q_norm.weight": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wkv.biases": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wkv.scales": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wkv.weight": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.0.biases": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.0.scales": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.0.weight": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.1.biases": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.1.scales": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.1.weight": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.2.biases": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.2.scales": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.2.weight": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.3.biases": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.3.scales": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.3.weight": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.4.biases": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.4.scales": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.4.weight": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.5.biases": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.5.scales": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.5.weight": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.6.biases": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.6.scales": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.6.weight": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.7.biases": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.7.scales": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_a.7.weight": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_b.biases": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_b.scales": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wo_b.weight": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wq_a.biases": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wq_a.scales": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wq_a.weight": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wq_b.biases": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wq_b.scales": "model-00012-of-00026.safetensors", |
| "layers.19.attn.wq_b.weight": "model-00012-of-00026.safetensors", |
| "layers.19.attn_norm.weight": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.experts.w1.biases": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.experts.w1.scales": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.experts.w1.weight": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.experts.w2.biases": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.experts.w2.scales": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.experts.w2.weight": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.experts.w3.biases": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.experts.w3.scales": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.experts.w3.weight": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.gate.bias": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.gate.weight": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.shared_experts.w1.biases": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.shared_experts.w1.scales": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.shared_experts.w1.weight": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.shared_experts.w2.biases": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.shared_experts.w2.scales": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.shared_experts.w2.weight": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.shared_experts.w3.biases": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.shared_experts.w3.scales": "model-00012-of-00026.safetensors", |
| "layers.19.ffn.shared_experts.w3.weight": "model-00012-of-00026.safetensors", |
| "layers.19.ffn_norm.weight": "model-00012-of-00026.safetensors", |
| "layers.19.hc_attn_base": "model-00012-of-00026.safetensors", |
| "layers.19.hc_attn_fn": "model-00012-of-00026.safetensors", |
| "layers.19.hc_attn_scale": "model-00012-of-00026.safetensors", |
| "layers.19.hc_ffn_base": "model-00012-of-00026.safetensors", |
| "layers.19.hc_ffn_fn": "model-00012-of-00026.safetensors", |
| "layers.19.hc_ffn_scale": "model-00012-of-00026.safetensors", |
| "layers.2.attn.attn_sink": "model-00002-of-00026.safetensors", |
| "layers.2.attn.compressor.ape": "model-00002-of-00026.safetensors", |
| "layers.2.attn.compressor.norm.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.compressor.wgate.biases": "model-00002-of-00026.safetensors", |
| "layers.2.attn.compressor.wgate.scales": "model-00002-of-00026.safetensors", |
| "layers.2.attn.compressor.wgate.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.compressor.wkv.biases": "model-00002-of-00026.safetensors", |
| "layers.2.attn.compressor.wkv.scales": "model-00002-of-00026.safetensors", |
| "layers.2.attn.compressor.wkv.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.indexer.compressor.ape": "model-00002-of-00026.safetensors", |
| "layers.2.attn.indexer.compressor.norm.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.indexer.compressor.wgate.biases": "model-00002-of-00026.safetensors", |
| "layers.2.attn.indexer.compressor.wgate.scales": "model-00002-of-00026.safetensors", |
| "layers.2.attn.indexer.compressor.wgate.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.indexer.compressor.wkv.biases": "model-00002-of-00026.safetensors", |
| "layers.2.attn.indexer.compressor.wkv.scales": "model-00002-of-00026.safetensors", |
| "layers.2.attn.indexer.compressor.wkv.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.indexer.weights_proj.biases": "model-00002-of-00026.safetensors", |
| "layers.2.attn.indexer.weights_proj.scales": "model-00002-of-00026.safetensors", |
| "layers.2.attn.indexer.weights_proj.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.indexer.wq_b.biases": "model-00002-of-00026.safetensors", |
| "layers.2.attn.indexer.wq_b.scales": "model-00002-of-00026.safetensors", |
| "layers.2.attn.indexer.wq_b.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.kv_norm.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.q_norm.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wkv.biases": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wkv.scales": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wkv.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.0.biases": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.0.scales": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.0.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.1.biases": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.1.scales": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.1.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.2.biases": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.2.scales": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.2.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.3.biases": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.3.scales": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.3.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.4.biases": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.4.scales": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.4.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.5.biases": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.5.scales": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.5.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.6.biases": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.6.scales": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.6.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.7.biases": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.7.scales": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_a.7.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_b.biases": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_b.scales": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wo_b.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wq_a.biases": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wq_a.scales": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wq_a.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wq_b.biases": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wq_b.scales": "model-00002-of-00026.safetensors", |
| "layers.2.attn.wq_b.weight": "model-00002-of-00026.safetensors", |
| "layers.2.attn_norm.weight": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.experts.w1.biases": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.experts.w1.scales": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.experts.w1.weight": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.experts.w2.biases": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.experts.w2.scales": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.experts.w2.weight": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.experts.w3.biases": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.experts.w3.scales": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.experts.w3.weight": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.gate.tid2eid": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.gate.weight": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.shared_experts.w1.biases": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.shared_experts.w1.scales": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.shared_experts.w1.weight": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.shared_experts.w2.biases": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.shared_experts.w2.scales": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.shared_experts.w2.weight": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.shared_experts.w3.biases": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.shared_experts.w3.scales": "model-00002-of-00026.safetensors", |
| "layers.2.ffn.shared_experts.w3.weight": "model-00002-of-00026.safetensors", |
| "layers.2.ffn_norm.weight": "model-00002-of-00026.safetensors", |
| "layers.2.hc_attn_base": "model-00002-of-00026.safetensors", |
| "layers.2.hc_attn_fn": "model-00002-of-00026.safetensors", |
| "layers.2.hc_attn_scale": "model-00002-of-00026.safetensors", |
| "layers.2.hc_ffn_base": "model-00002-of-00026.safetensors", |
| "layers.2.hc_ffn_fn": "model-00002-of-00026.safetensors", |
| "layers.2.hc_ffn_scale": "model-00002-of-00026.safetensors", |
| "layers.20.attn.attn_sink": "model-00012-of-00026.safetensors", |
| "layers.20.attn.compressor.ape": "model-00012-of-00026.safetensors", |
| "layers.20.attn.compressor.norm.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.compressor.wgate.biases": "model-00012-of-00026.safetensors", |
| "layers.20.attn.compressor.wgate.scales": "model-00012-of-00026.safetensors", |
| "layers.20.attn.compressor.wgate.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.compressor.wkv.biases": "model-00012-of-00026.safetensors", |
| "layers.20.attn.compressor.wkv.scales": "model-00012-of-00026.safetensors", |
| "layers.20.attn.compressor.wkv.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.indexer.compressor.ape": "model-00012-of-00026.safetensors", |
| "layers.20.attn.indexer.compressor.norm.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.indexer.compressor.wgate.biases": "model-00012-of-00026.safetensors", |
| "layers.20.attn.indexer.compressor.wgate.scales": "model-00012-of-00026.safetensors", |
| "layers.20.attn.indexer.compressor.wgate.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.indexer.compressor.wkv.biases": "model-00012-of-00026.safetensors", |
| "layers.20.attn.indexer.compressor.wkv.scales": "model-00012-of-00026.safetensors", |
| "layers.20.attn.indexer.compressor.wkv.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.indexer.weights_proj.biases": "model-00012-of-00026.safetensors", |
| "layers.20.attn.indexer.weights_proj.scales": "model-00012-of-00026.safetensors", |
| "layers.20.attn.indexer.weights_proj.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.indexer.wq_b.biases": "model-00012-of-00026.safetensors", |
| "layers.20.attn.indexer.wq_b.scales": "model-00012-of-00026.safetensors", |
| "layers.20.attn.indexer.wq_b.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.kv_norm.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.q_norm.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wkv.biases": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wkv.scales": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wkv.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.0.biases": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.0.scales": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.0.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.1.biases": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.1.scales": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.1.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.2.biases": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.2.scales": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.2.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.3.biases": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.3.scales": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.3.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.4.biases": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.4.scales": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.4.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.5.biases": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.5.scales": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.5.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.6.biases": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.6.scales": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.6.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.7.biases": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.7.scales": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_a.7.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_b.biases": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_b.scales": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wo_b.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wq_a.biases": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wq_a.scales": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wq_a.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wq_b.biases": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wq_b.scales": "model-00012-of-00026.safetensors", |
| "layers.20.attn.wq_b.weight": "model-00012-of-00026.safetensors", |
| "layers.20.attn_norm.weight": "model-00013-of-00026.safetensors", |
| "layers.20.ffn.experts.w1.biases": "model-00013-of-00026.safetensors", |
| "layers.20.ffn.experts.w1.scales": "model-00013-of-00026.safetensors", |
| "layers.20.ffn.experts.w1.weight": "model-00013-of-00026.safetensors", |
| "layers.20.ffn.experts.w2.biases": "model-00013-of-00026.safetensors", |
| "layers.20.ffn.experts.w2.scales": "model-00013-of-00026.safetensors", |
| "layers.20.ffn.experts.w2.weight": "model-00013-of-00026.safetensors", |
| "layers.20.ffn.experts.w3.biases": "model-00013-of-00026.safetensors", |
| "layers.20.ffn.experts.w3.scales": "model-00013-of-00026.safetensors", |
| "layers.20.ffn.experts.w3.weight": "model-00013-of-00026.safetensors", |
| "layers.20.ffn.gate.bias": "model-00012-of-00026.safetensors", |
| "layers.20.ffn.gate.weight": "model-00012-of-00026.safetensors", |
| "layers.20.ffn.shared_experts.w1.biases": "model-00013-of-00026.safetensors", |
| "layers.20.ffn.shared_experts.w1.scales": "model-00013-of-00026.safetensors", |
| "layers.20.ffn.shared_experts.w1.weight": "model-00013-of-00026.safetensors", |
| "layers.20.ffn.shared_experts.w2.biases": "model-00013-of-00026.safetensors", |
| "layers.20.ffn.shared_experts.w2.scales": "model-00013-of-00026.safetensors", |
| "layers.20.ffn.shared_experts.w2.weight": "model-00013-of-00026.safetensors", |
| "layers.20.ffn.shared_experts.w3.biases": "model-00013-of-00026.safetensors", |
| "layers.20.ffn.shared_experts.w3.scales": "model-00013-of-00026.safetensors", |
| "layers.20.ffn.shared_experts.w3.weight": "model-00013-of-00026.safetensors", |
| "layers.20.ffn_norm.weight": "model-00013-of-00026.safetensors", |
| "layers.20.hc_attn_base": "model-00013-of-00026.safetensors", |
| "layers.20.hc_attn_fn": "model-00013-of-00026.safetensors", |
| "layers.20.hc_attn_scale": "model-00013-of-00026.safetensors", |
| "layers.20.hc_ffn_base": "model-00013-of-00026.safetensors", |
| "layers.20.hc_ffn_fn": "model-00013-of-00026.safetensors", |
| "layers.20.hc_ffn_scale": "model-00013-of-00026.safetensors", |
| "layers.21.attn.attn_sink": "model-00013-of-00026.safetensors", |
| "layers.21.attn.compressor.ape": "model-00013-of-00026.safetensors", |
| "layers.21.attn.compressor.norm.weight": "model-00013-of-00026.safetensors", |
| "layers.21.attn.compressor.wgate.biases": "model-00013-of-00026.safetensors", |
| "layers.21.attn.compressor.wgate.scales": "model-00013-of-00026.safetensors", |
| "layers.21.attn.compressor.wgate.weight": "model-00013-of-00026.safetensors", |
| "layers.21.attn.compressor.wkv.biases": "model-00013-of-00026.safetensors", |
| "layers.21.attn.compressor.wkv.scales": "model-00013-of-00026.safetensors", |
| "layers.21.attn.compressor.wkv.weight": "model-00013-of-00026.safetensors", |
| "layers.21.attn.kv_norm.weight": "model-00013-of-00026.safetensors", |
| "layers.21.attn.q_norm.weight": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wkv.biases": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wkv.scales": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wkv.weight": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.0.biases": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.0.scales": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.0.weight": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.1.biases": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.1.scales": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.1.weight": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.2.biases": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.2.scales": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.2.weight": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.3.biases": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.3.scales": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.3.weight": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.4.biases": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.4.scales": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.4.weight": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.5.biases": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.5.scales": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.5.weight": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.6.biases": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.6.scales": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.6.weight": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.7.biases": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.7.scales": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_a.7.weight": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_b.biases": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_b.scales": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wo_b.weight": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wq_a.biases": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wq_a.scales": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wq_a.weight": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wq_b.biases": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wq_b.scales": "model-00013-of-00026.safetensors", |
| "layers.21.attn.wq_b.weight": "model-00013-of-00026.safetensors", |
| "layers.21.attn_norm.weight": "model-00014-of-00026.safetensors", |
| "layers.21.ffn.experts.w1.biases": "model-00013-of-00026.safetensors", |
| "layers.21.ffn.experts.w1.scales": "model-00013-of-00026.safetensors", |
| "layers.21.ffn.experts.w1.weight": "model-00013-of-00026.safetensors", |
| "layers.21.ffn.experts.w2.biases": "model-00013-of-00026.safetensors", |
| "layers.21.ffn.experts.w2.scales": "model-00013-of-00026.safetensors", |
| "layers.21.ffn.experts.w2.weight": "model-00013-of-00026.safetensors", |
| "layers.21.ffn.experts.w3.biases": "model-00014-of-00026.safetensors", |
| "layers.21.ffn.experts.w3.scales": "model-00014-of-00026.safetensors", |
| "layers.21.ffn.experts.w3.weight": "model-00014-of-00026.safetensors", |
| "layers.21.ffn.gate.bias": "model-00013-of-00026.safetensors", |
| "layers.21.ffn.gate.weight": "model-00013-of-00026.safetensors", |
| "layers.21.ffn.shared_experts.w1.biases": "model-00014-of-00026.safetensors", |
| "layers.21.ffn.shared_experts.w1.scales": "model-00014-of-00026.safetensors", |
| "layers.21.ffn.shared_experts.w1.weight": "model-00014-of-00026.safetensors", |
| "layers.21.ffn.shared_experts.w2.biases": "model-00014-of-00026.safetensors", |
| "layers.21.ffn.shared_experts.w2.scales": "model-00014-of-00026.safetensors", |
| "layers.21.ffn.shared_experts.w2.weight": "model-00014-of-00026.safetensors", |
| "layers.21.ffn.shared_experts.w3.biases": "model-00014-of-00026.safetensors", |
| "layers.21.ffn.shared_experts.w3.scales": "model-00014-of-00026.safetensors", |
| "layers.21.ffn.shared_experts.w3.weight": "model-00014-of-00026.safetensors", |
| "layers.21.ffn_norm.weight": "model-00014-of-00026.safetensors", |
| "layers.21.hc_attn_base": "model-00014-of-00026.safetensors", |
| "layers.21.hc_attn_fn": "model-00014-of-00026.safetensors", |
| "layers.21.hc_attn_scale": "model-00014-of-00026.safetensors", |
| "layers.21.hc_ffn_base": "model-00014-of-00026.safetensors", |
| "layers.21.hc_ffn_fn": "model-00014-of-00026.safetensors", |
| "layers.21.hc_ffn_scale": "model-00014-of-00026.safetensors", |
| "layers.22.attn.attn_sink": "model-00014-of-00026.safetensors", |
| "layers.22.attn.compressor.ape": "model-00014-of-00026.safetensors", |
| "layers.22.attn.compressor.norm.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.compressor.wgate.biases": "model-00014-of-00026.safetensors", |
| "layers.22.attn.compressor.wgate.scales": "model-00014-of-00026.safetensors", |
| "layers.22.attn.compressor.wgate.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.compressor.wkv.biases": "model-00014-of-00026.safetensors", |
| "layers.22.attn.compressor.wkv.scales": "model-00014-of-00026.safetensors", |
| "layers.22.attn.compressor.wkv.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.indexer.compressor.ape": "model-00014-of-00026.safetensors", |
| "layers.22.attn.indexer.compressor.norm.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.indexer.compressor.wgate.biases": "model-00014-of-00026.safetensors", |
| "layers.22.attn.indexer.compressor.wgate.scales": "model-00014-of-00026.safetensors", |
| "layers.22.attn.indexer.compressor.wgate.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.indexer.compressor.wkv.biases": "model-00014-of-00026.safetensors", |
| "layers.22.attn.indexer.compressor.wkv.scales": "model-00014-of-00026.safetensors", |
| "layers.22.attn.indexer.compressor.wkv.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.indexer.weights_proj.biases": "model-00014-of-00026.safetensors", |
| "layers.22.attn.indexer.weights_proj.scales": "model-00014-of-00026.safetensors", |
| "layers.22.attn.indexer.weights_proj.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.indexer.wq_b.biases": "model-00014-of-00026.safetensors", |
| "layers.22.attn.indexer.wq_b.scales": "model-00014-of-00026.safetensors", |
| "layers.22.attn.indexer.wq_b.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.kv_norm.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.q_norm.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wkv.biases": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wkv.scales": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wkv.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.0.biases": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.0.scales": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.0.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.1.biases": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.1.scales": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.1.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.2.biases": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.2.scales": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.2.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.3.biases": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.3.scales": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.3.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.4.biases": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.4.scales": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.4.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.5.biases": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.5.scales": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.5.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.6.biases": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.6.scales": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.6.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.7.biases": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.7.scales": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_a.7.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_b.biases": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_b.scales": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wo_b.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wq_a.biases": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wq_a.scales": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wq_a.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wq_b.biases": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wq_b.scales": "model-00014-of-00026.safetensors", |
| "layers.22.attn.wq_b.weight": "model-00014-of-00026.safetensors", |
| "layers.22.attn_norm.weight": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.experts.w1.biases": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.experts.w1.scales": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.experts.w1.weight": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.experts.w2.biases": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.experts.w2.scales": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.experts.w2.weight": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.experts.w3.biases": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.experts.w3.scales": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.experts.w3.weight": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.gate.bias": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.gate.weight": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.shared_experts.w1.biases": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.shared_experts.w1.scales": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.shared_experts.w1.weight": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.shared_experts.w2.biases": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.shared_experts.w2.scales": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.shared_experts.w2.weight": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.shared_experts.w3.biases": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.shared_experts.w3.scales": "model-00014-of-00026.safetensors", |
| "layers.22.ffn.shared_experts.w3.weight": "model-00014-of-00026.safetensors", |
| "layers.22.ffn_norm.weight": "model-00014-of-00026.safetensors", |
| "layers.22.hc_attn_base": "model-00014-of-00026.safetensors", |
| "layers.22.hc_attn_fn": "model-00014-of-00026.safetensors", |
| "layers.22.hc_attn_scale": "model-00014-of-00026.safetensors", |
| "layers.22.hc_ffn_base": "model-00014-of-00026.safetensors", |
| "layers.22.hc_ffn_fn": "model-00014-of-00026.safetensors", |
| "layers.22.hc_ffn_scale": "model-00014-of-00026.safetensors", |
| "layers.23.attn.attn_sink": "model-00014-of-00026.safetensors", |
| "layers.23.attn.compressor.ape": "model-00014-of-00026.safetensors", |
| "layers.23.attn.compressor.norm.weight": "model-00014-of-00026.safetensors", |
| "layers.23.attn.compressor.wgate.biases": "model-00014-of-00026.safetensors", |
| "layers.23.attn.compressor.wgate.scales": "model-00014-of-00026.safetensors", |
| "layers.23.attn.compressor.wgate.weight": "model-00014-of-00026.safetensors", |
| "layers.23.attn.compressor.wkv.biases": "model-00014-of-00026.safetensors", |
| "layers.23.attn.compressor.wkv.scales": "model-00014-of-00026.safetensors", |
| "layers.23.attn.compressor.wkv.weight": "model-00014-of-00026.safetensors", |
| "layers.23.attn.kv_norm.weight": "model-00014-of-00026.safetensors", |
| "layers.23.attn.q_norm.weight": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wkv.biases": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wkv.scales": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wkv.weight": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.0.biases": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.0.scales": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.0.weight": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.1.biases": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.1.scales": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.1.weight": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.2.biases": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.2.scales": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.2.weight": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.3.biases": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.3.scales": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.3.weight": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.4.biases": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.4.scales": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.4.weight": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.5.biases": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.5.scales": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.5.weight": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.6.biases": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.6.scales": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.6.weight": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.7.biases": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.7.scales": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_a.7.weight": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_b.biases": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_b.scales": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wo_b.weight": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wq_a.biases": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wq_a.scales": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wq_a.weight": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wq_b.biases": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wq_b.scales": "model-00014-of-00026.safetensors", |
| "layers.23.attn.wq_b.weight": "model-00014-of-00026.safetensors", |
| "layers.23.attn_norm.weight": "model-00015-of-00026.safetensors", |
| "layers.23.ffn.experts.w1.biases": "model-00014-of-00026.safetensors", |
| "layers.23.ffn.experts.w1.scales": "model-00014-of-00026.safetensors", |
| "layers.23.ffn.experts.w1.weight": "model-00014-of-00026.safetensors", |
| "layers.23.ffn.experts.w2.biases": "model-00015-of-00026.safetensors", |
| "layers.23.ffn.experts.w2.scales": "model-00015-of-00026.safetensors", |
| "layers.23.ffn.experts.w2.weight": "model-00015-of-00026.safetensors", |
| "layers.23.ffn.experts.w3.biases": "model-00015-of-00026.safetensors", |
| "layers.23.ffn.experts.w3.scales": "model-00015-of-00026.safetensors", |
| "layers.23.ffn.experts.w3.weight": "model-00015-of-00026.safetensors", |
| "layers.23.ffn.gate.bias": "model-00014-of-00026.safetensors", |
| "layers.23.ffn.gate.weight": "model-00014-of-00026.safetensors", |
| "layers.23.ffn.shared_experts.w1.biases": "model-00015-of-00026.safetensors", |
| "layers.23.ffn.shared_experts.w1.scales": "model-00015-of-00026.safetensors", |
| "layers.23.ffn.shared_experts.w1.weight": "model-00015-of-00026.safetensors", |
| "layers.23.ffn.shared_experts.w2.biases": "model-00015-of-00026.safetensors", |
| "layers.23.ffn.shared_experts.w2.scales": "model-00015-of-00026.safetensors", |
| "layers.23.ffn.shared_experts.w2.weight": "model-00015-of-00026.safetensors", |
| "layers.23.ffn.shared_experts.w3.biases": "model-00015-of-00026.safetensors", |
| "layers.23.ffn.shared_experts.w3.scales": "model-00015-of-00026.safetensors", |
| "layers.23.ffn.shared_experts.w3.weight": "model-00015-of-00026.safetensors", |
| "layers.23.ffn_norm.weight": "model-00015-of-00026.safetensors", |
| "layers.23.hc_attn_base": "model-00015-of-00026.safetensors", |
| "layers.23.hc_attn_fn": "model-00015-of-00026.safetensors", |
| "layers.23.hc_attn_scale": "model-00015-of-00026.safetensors", |
| "layers.23.hc_ffn_base": "model-00015-of-00026.safetensors", |
| "layers.23.hc_ffn_fn": "model-00015-of-00026.safetensors", |
| "layers.23.hc_ffn_scale": "model-00015-of-00026.safetensors", |
| "layers.24.attn.attn_sink": "model-00015-of-00026.safetensors", |
| "layers.24.attn.compressor.ape": "model-00015-of-00026.safetensors", |
| "layers.24.attn.compressor.norm.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.compressor.wgate.biases": "model-00015-of-00026.safetensors", |
| "layers.24.attn.compressor.wgate.scales": "model-00015-of-00026.safetensors", |
| "layers.24.attn.compressor.wgate.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.compressor.wkv.biases": "model-00015-of-00026.safetensors", |
| "layers.24.attn.compressor.wkv.scales": "model-00015-of-00026.safetensors", |
| "layers.24.attn.compressor.wkv.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.indexer.compressor.ape": "model-00015-of-00026.safetensors", |
| "layers.24.attn.indexer.compressor.norm.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.indexer.compressor.wgate.biases": "model-00015-of-00026.safetensors", |
| "layers.24.attn.indexer.compressor.wgate.scales": "model-00015-of-00026.safetensors", |
| "layers.24.attn.indexer.compressor.wgate.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.indexer.compressor.wkv.biases": "model-00015-of-00026.safetensors", |
| "layers.24.attn.indexer.compressor.wkv.scales": "model-00015-of-00026.safetensors", |
| "layers.24.attn.indexer.compressor.wkv.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.indexer.weights_proj.biases": "model-00015-of-00026.safetensors", |
| "layers.24.attn.indexer.weights_proj.scales": "model-00015-of-00026.safetensors", |
| "layers.24.attn.indexer.weights_proj.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.indexer.wq_b.biases": "model-00015-of-00026.safetensors", |
| "layers.24.attn.indexer.wq_b.scales": "model-00015-of-00026.safetensors", |
| "layers.24.attn.indexer.wq_b.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.kv_norm.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.q_norm.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wkv.biases": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wkv.scales": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wkv.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.0.biases": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.0.scales": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.0.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.1.biases": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.1.scales": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.1.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.2.biases": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.2.scales": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.2.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.3.biases": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.3.scales": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.3.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.4.biases": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.4.scales": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.4.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.5.biases": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.5.scales": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.5.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.6.biases": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.6.scales": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.6.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.7.biases": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.7.scales": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_a.7.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_b.biases": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_b.scales": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wo_b.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wq_a.biases": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wq_a.scales": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wq_a.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wq_b.biases": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wq_b.scales": "model-00015-of-00026.safetensors", |
| "layers.24.attn.wq_b.weight": "model-00015-of-00026.safetensors", |
| "layers.24.attn_norm.weight": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.experts.w1.biases": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.experts.w1.scales": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.experts.w1.weight": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.experts.w2.biases": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.experts.w2.scales": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.experts.w2.weight": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.experts.w3.biases": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.experts.w3.scales": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.experts.w3.weight": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.gate.bias": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.gate.weight": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.shared_experts.w1.biases": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.shared_experts.w1.scales": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.shared_experts.w1.weight": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.shared_experts.w2.biases": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.shared_experts.w2.scales": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.shared_experts.w2.weight": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.shared_experts.w3.biases": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.shared_experts.w3.scales": "model-00015-of-00026.safetensors", |
| "layers.24.ffn.shared_experts.w3.weight": "model-00015-of-00026.safetensors", |
| "layers.24.ffn_norm.weight": "model-00015-of-00026.safetensors", |
| "layers.24.hc_attn_base": "model-00015-of-00026.safetensors", |
| "layers.24.hc_attn_fn": "model-00015-of-00026.safetensors", |
| "layers.24.hc_attn_scale": "model-00015-of-00026.safetensors", |
| "layers.24.hc_ffn_base": "model-00015-of-00026.safetensors", |
| "layers.24.hc_ffn_fn": "model-00015-of-00026.safetensors", |
| "layers.24.hc_ffn_scale": "model-00015-of-00026.safetensors", |
| "layers.25.attn.attn_sink": "model-00015-of-00026.safetensors", |
| "layers.25.attn.compressor.ape": "model-00015-of-00026.safetensors", |
| "layers.25.attn.compressor.norm.weight": "model-00015-of-00026.safetensors", |
| "layers.25.attn.compressor.wgate.biases": "model-00015-of-00026.safetensors", |
| "layers.25.attn.compressor.wgate.scales": "model-00015-of-00026.safetensors", |
| "layers.25.attn.compressor.wgate.weight": "model-00015-of-00026.safetensors", |
| "layers.25.attn.compressor.wkv.biases": "model-00015-of-00026.safetensors", |
| "layers.25.attn.compressor.wkv.scales": "model-00015-of-00026.safetensors", |
| "layers.25.attn.compressor.wkv.weight": "model-00015-of-00026.safetensors", |
| "layers.25.attn.kv_norm.weight": "model-00015-of-00026.safetensors", |
| "layers.25.attn.q_norm.weight": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wkv.biases": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wkv.scales": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wkv.weight": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.0.biases": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.0.scales": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.0.weight": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.1.biases": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.1.scales": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.1.weight": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.2.biases": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.2.scales": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.2.weight": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.3.biases": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.3.scales": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.3.weight": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.4.biases": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.4.scales": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.4.weight": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.5.biases": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.5.scales": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.5.weight": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.6.biases": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.6.scales": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.6.weight": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.7.biases": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.7.scales": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_a.7.weight": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_b.biases": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_b.scales": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wo_b.weight": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wq_a.biases": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wq_a.scales": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wq_a.weight": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wq_b.biases": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wq_b.scales": "model-00015-of-00026.safetensors", |
| "layers.25.attn.wq_b.weight": "model-00015-of-00026.safetensors", |
| "layers.25.attn_norm.weight": "model-00016-of-00026.safetensors", |
| "layers.25.ffn.experts.w1.biases": "model-00016-of-00026.safetensors", |
| "layers.25.ffn.experts.w1.scales": "model-00016-of-00026.safetensors", |
| "layers.25.ffn.experts.w1.weight": "model-00016-of-00026.safetensors", |
| "layers.25.ffn.experts.w2.biases": "model-00016-of-00026.safetensors", |
| "layers.25.ffn.experts.w2.scales": "model-00016-of-00026.safetensors", |
| "layers.25.ffn.experts.w2.weight": "model-00016-of-00026.safetensors", |
| "layers.25.ffn.experts.w3.biases": "model-00016-of-00026.safetensors", |
| "layers.25.ffn.experts.w3.scales": "model-00016-of-00026.safetensors", |
| "layers.25.ffn.experts.w3.weight": "model-00016-of-00026.safetensors", |
| "layers.25.ffn.gate.bias": "model-00015-of-00026.safetensors", |
| "layers.25.ffn.gate.weight": "model-00015-of-00026.safetensors", |
| "layers.25.ffn.shared_experts.w1.biases": "model-00016-of-00026.safetensors", |
| "layers.25.ffn.shared_experts.w1.scales": "model-00016-of-00026.safetensors", |
| "layers.25.ffn.shared_experts.w1.weight": "model-00016-of-00026.safetensors", |
| "layers.25.ffn.shared_experts.w2.biases": "model-00016-of-00026.safetensors", |
| "layers.25.ffn.shared_experts.w2.scales": "model-00016-of-00026.safetensors", |
| "layers.25.ffn.shared_experts.w2.weight": "model-00016-of-00026.safetensors", |
| "layers.25.ffn.shared_experts.w3.biases": "model-00016-of-00026.safetensors", |
| "layers.25.ffn.shared_experts.w3.scales": "model-00016-of-00026.safetensors", |
| "layers.25.ffn.shared_experts.w3.weight": "model-00016-of-00026.safetensors", |
| "layers.25.ffn_norm.weight": "model-00016-of-00026.safetensors", |
| "layers.25.hc_attn_base": "model-00016-of-00026.safetensors", |
| "layers.25.hc_attn_fn": "model-00016-of-00026.safetensors", |
| "layers.25.hc_attn_scale": "model-00016-of-00026.safetensors", |
| "layers.25.hc_ffn_base": "model-00016-of-00026.safetensors", |
| "layers.25.hc_ffn_fn": "model-00016-of-00026.safetensors", |
| "layers.25.hc_ffn_scale": "model-00016-of-00026.safetensors", |
| "layers.26.attn.attn_sink": "model-00016-of-00026.safetensors", |
| "layers.26.attn.compressor.ape": "model-00016-of-00026.safetensors", |
| "layers.26.attn.compressor.norm.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.compressor.wgate.biases": "model-00016-of-00026.safetensors", |
| "layers.26.attn.compressor.wgate.scales": "model-00016-of-00026.safetensors", |
| "layers.26.attn.compressor.wgate.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.compressor.wkv.biases": "model-00016-of-00026.safetensors", |
| "layers.26.attn.compressor.wkv.scales": "model-00016-of-00026.safetensors", |
| "layers.26.attn.compressor.wkv.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.indexer.compressor.ape": "model-00016-of-00026.safetensors", |
| "layers.26.attn.indexer.compressor.norm.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.indexer.compressor.wgate.biases": "model-00016-of-00026.safetensors", |
| "layers.26.attn.indexer.compressor.wgate.scales": "model-00016-of-00026.safetensors", |
| "layers.26.attn.indexer.compressor.wgate.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.indexer.compressor.wkv.biases": "model-00016-of-00026.safetensors", |
| "layers.26.attn.indexer.compressor.wkv.scales": "model-00016-of-00026.safetensors", |
| "layers.26.attn.indexer.compressor.wkv.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.indexer.weights_proj.biases": "model-00016-of-00026.safetensors", |
| "layers.26.attn.indexer.weights_proj.scales": "model-00016-of-00026.safetensors", |
| "layers.26.attn.indexer.weights_proj.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.indexer.wq_b.biases": "model-00016-of-00026.safetensors", |
| "layers.26.attn.indexer.wq_b.scales": "model-00016-of-00026.safetensors", |
| "layers.26.attn.indexer.wq_b.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.kv_norm.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.q_norm.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wkv.biases": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wkv.scales": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wkv.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.0.biases": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.0.scales": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.0.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.1.biases": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.1.scales": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.1.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.2.biases": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.2.scales": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.2.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.3.biases": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.3.scales": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.3.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.4.biases": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.4.scales": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.4.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.5.biases": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.5.scales": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.5.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.6.biases": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.6.scales": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.6.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.7.biases": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.7.scales": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_a.7.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_b.biases": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_b.scales": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wo_b.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wq_a.biases": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wq_a.scales": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wq_a.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wq_b.biases": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wq_b.scales": "model-00016-of-00026.safetensors", |
| "layers.26.attn.wq_b.weight": "model-00016-of-00026.safetensors", |
| "layers.26.attn_norm.weight": "model-00017-of-00026.safetensors", |
| "layers.26.ffn.experts.w1.biases": "model-00016-of-00026.safetensors", |
| "layers.26.ffn.experts.w1.scales": "model-00016-of-00026.safetensors", |
| "layers.26.ffn.experts.w1.weight": "model-00016-of-00026.safetensors", |
| "layers.26.ffn.experts.w2.biases": "model-00016-of-00026.safetensors", |
| "layers.26.ffn.experts.w2.scales": "model-00016-of-00026.safetensors", |
| "layers.26.ffn.experts.w2.weight": "model-00016-of-00026.safetensors", |
| "layers.26.ffn.experts.w3.biases": "model-00017-of-00026.safetensors", |
| "layers.26.ffn.experts.w3.scales": "model-00017-of-00026.safetensors", |
| "layers.26.ffn.experts.w3.weight": "model-00017-of-00026.safetensors", |
| "layers.26.ffn.gate.bias": "model-00016-of-00026.safetensors", |
| "layers.26.ffn.gate.weight": "model-00016-of-00026.safetensors", |
| "layers.26.ffn.shared_experts.w1.biases": "model-00017-of-00026.safetensors", |
| "layers.26.ffn.shared_experts.w1.scales": "model-00017-of-00026.safetensors", |
| "layers.26.ffn.shared_experts.w1.weight": "model-00017-of-00026.safetensors", |
| "layers.26.ffn.shared_experts.w2.biases": "model-00017-of-00026.safetensors", |
| "layers.26.ffn.shared_experts.w2.scales": "model-00017-of-00026.safetensors", |
| "layers.26.ffn.shared_experts.w2.weight": "model-00017-of-00026.safetensors", |
| "layers.26.ffn.shared_experts.w3.biases": "model-00017-of-00026.safetensors", |
| "layers.26.ffn.shared_experts.w3.scales": "model-00017-of-00026.safetensors", |
| "layers.26.ffn.shared_experts.w3.weight": "model-00017-of-00026.safetensors", |
| "layers.26.ffn_norm.weight": "model-00017-of-00026.safetensors", |
| "layers.26.hc_attn_base": "model-00017-of-00026.safetensors", |
| "layers.26.hc_attn_fn": "model-00017-of-00026.safetensors", |
| "layers.26.hc_attn_scale": "model-00017-of-00026.safetensors", |
| "layers.26.hc_ffn_base": "model-00017-of-00026.safetensors", |
| "layers.26.hc_ffn_fn": "model-00017-of-00026.safetensors", |
| "layers.26.hc_ffn_scale": "model-00017-of-00026.safetensors", |
| "layers.27.attn.attn_sink": "model-00017-of-00026.safetensors", |
| "layers.27.attn.compressor.ape": "model-00017-of-00026.safetensors", |
| "layers.27.attn.compressor.norm.weight": "model-00017-of-00026.safetensors", |
| "layers.27.attn.compressor.wgate.biases": "model-00017-of-00026.safetensors", |
| "layers.27.attn.compressor.wgate.scales": "model-00017-of-00026.safetensors", |
| "layers.27.attn.compressor.wgate.weight": "model-00017-of-00026.safetensors", |
| "layers.27.attn.compressor.wkv.biases": "model-00017-of-00026.safetensors", |
| "layers.27.attn.compressor.wkv.scales": "model-00017-of-00026.safetensors", |
| "layers.27.attn.compressor.wkv.weight": "model-00017-of-00026.safetensors", |
| "layers.27.attn.kv_norm.weight": "model-00017-of-00026.safetensors", |
| "layers.27.attn.q_norm.weight": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wkv.biases": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wkv.scales": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wkv.weight": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.0.biases": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.0.scales": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.0.weight": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.1.biases": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.1.scales": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.1.weight": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.2.biases": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.2.scales": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.2.weight": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.3.biases": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.3.scales": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.3.weight": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.4.biases": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.4.scales": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.4.weight": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.5.biases": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.5.scales": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.5.weight": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.6.biases": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.6.scales": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.6.weight": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.7.biases": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.7.scales": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_a.7.weight": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_b.biases": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_b.scales": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wo_b.weight": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wq_a.biases": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wq_a.scales": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wq_a.weight": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wq_b.biases": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wq_b.scales": "model-00017-of-00026.safetensors", |
| "layers.27.attn.wq_b.weight": "model-00017-of-00026.safetensors", |
| "layers.27.attn_norm.weight": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.experts.w1.biases": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.experts.w1.scales": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.experts.w1.weight": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.experts.w2.biases": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.experts.w2.scales": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.experts.w2.weight": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.experts.w3.biases": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.experts.w3.scales": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.experts.w3.weight": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.gate.bias": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.gate.weight": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.shared_experts.w1.biases": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.shared_experts.w1.scales": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.shared_experts.w1.weight": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.shared_experts.w2.biases": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.shared_experts.w2.scales": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.shared_experts.w2.weight": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.shared_experts.w3.biases": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.shared_experts.w3.scales": "model-00017-of-00026.safetensors", |
| "layers.27.ffn.shared_experts.w3.weight": "model-00017-of-00026.safetensors", |
| "layers.27.ffn_norm.weight": "model-00017-of-00026.safetensors", |
| "layers.27.hc_attn_base": "model-00017-of-00026.safetensors", |
| "layers.27.hc_attn_fn": "model-00017-of-00026.safetensors", |
| "layers.27.hc_attn_scale": "model-00017-of-00026.safetensors", |
| "layers.27.hc_ffn_base": "model-00017-of-00026.safetensors", |
| "layers.27.hc_ffn_fn": "model-00017-of-00026.safetensors", |
| "layers.27.hc_ffn_scale": "model-00017-of-00026.safetensors", |
| "layers.28.attn.attn_sink": "model-00017-of-00026.safetensors", |
| "layers.28.attn.compressor.ape": "model-00017-of-00026.safetensors", |
| "layers.28.attn.compressor.norm.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.compressor.wgate.biases": "model-00017-of-00026.safetensors", |
| "layers.28.attn.compressor.wgate.scales": "model-00017-of-00026.safetensors", |
| "layers.28.attn.compressor.wgate.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.compressor.wkv.biases": "model-00017-of-00026.safetensors", |
| "layers.28.attn.compressor.wkv.scales": "model-00017-of-00026.safetensors", |
| "layers.28.attn.compressor.wkv.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.indexer.compressor.ape": "model-00017-of-00026.safetensors", |
| "layers.28.attn.indexer.compressor.norm.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.indexer.compressor.wgate.biases": "model-00017-of-00026.safetensors", |
| "layers.28.attn.indexer.compressor.wgate.scales": "model-00017-of-00026.safetensors", |
| "layers.28.attn.indexer.compressor.wgate.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.indexer.compressor.wkv.biases": "model-00017-of-00026.safetensors", |
| "layers.28.attn.indexer.compressor.wkv.scales": "model-00017-of-00026.safetensors", |
| "layers.28.attn.indexer.compressor.wkv.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.indexer.weights_proj.biases": "model-00017-of-00026.safetensors", |
| "layers.28.attn.indexer.weights_proj.scales": "model-00017-of-00026.safetensors", |
| "layers.28.attn.indexer.weights_proj.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.indexer.wq_b.biases": "model-00017-of-00026.safetensors", |
| "layers.28.attn.indexer.wq_b.scales": "model-00017-of-00026.safetensors", |
| "layers.28.attn.indexer.wq_b.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.kv_norm.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.q_norm.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wkv.biases": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wkv.scales": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wkv.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.0.biases": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.0.scales": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.0.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.1.biases": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.1.scales": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.1.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.2.biases": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.2.scales": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.2.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.3.biases": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.3.scales": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.3.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.4.biases": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.4.scales": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.4.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.5.biases": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.5.scales": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.5.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.6.biases": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.6.scales": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.6.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.7.biases": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.7.scales": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_a.7.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_b.biases": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_b.scales": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wo_b.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wq_a.biases": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wq_a.scales": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wq_a.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wq_b.biases": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wq_b.scales": "model-00017-of-00026.safetensors", |
| "layers.28.attn.wq_b.weight": "model-00017-of-00026.safetensors", |
| "layers.28.attn_norm.weight": "model-00018-of-00026.safetensors", |
| "layers.28.ffn.experts.w1.biases": "model-00017-of-00026.safetensors", |
| "layers.28.ffn.experts.w1.scales": "model-00017-of-00026.safetensors", |
| "layers.28.ffn.experts.w1.weight": "model-00017-of-00026.safetensors", |
| "layers.28.ffn.experts.w2.biases": "model-00018-of-00026.safetensors", |
| "layers.28.ffn.experts.w2.scales": "model-00018-of-00026.safetensors", |
| "layers.28.ffn.experts.w2.weight": "model-00018-of-00026.safetensors", |
| "layers.28.ffn.experts.w3.biases": "model-00018-of-00026.safetensors", |
| "layers.28.ffn.experts.w3.scales": "model-00018-of-00026.safetensors", |
| "layers.28.ffn.experts.w3.weight": "model-00018-of-00026.safetensors", |
| "layers.28.ffn.gate.bias": "model-00017-of-00026.safetensors", |
| "layers.28.ffn.gate.weight": "model-00017-of-00026.safetensors", |
| "layers.28.ffn.shared_experts.w1.biases": "model-00018-of-00026.safetensors", |
| "layers.28.ffn.shared_experts.w1.scales": "model-00018-of-00026.safetensors", |
| "layers.28.ffn.shared_experts.w1.weight": "model-00018-of-00026.safetensors", |
| "layers.28.ffn.shared_experts.w2.biases": "model-00018-of-00026.safetensors", |
| "layers.28.ffn.shared_experts.w2.scales": "model-00018-of-00026.safetensors", |
| "layers.28.ffn.shared_experts.w2.weight": "model-00018-of-00026.safetensors", |
| "layers.28.ffn.shared_experts.w3.biases": "model-00018-of-00026.safetensors", |
| "layers.28.ffn.shared_experts.w3.scales": "model-00018-of-00026.safetensors", |
| "layers.28.ffn.shared_experts.w3.weight": "model-00018-of-00026.safetensors", |
| "layers.28.ffn_norm.weight": "model-00018-of-00026.safetensors", |
| "layers.28.hc_attn_base": "model-00018-of-00026.safetensors", |
| "layers.28.hc_attn_fn": "model-00018-of-00026.safetensors", |
| "layers.28.hc_attn_scale": "model-00018-of-00026.safetensors", |
| "layers.28.hc_ffn_base": "model-00018-of-00026.safetensors", |
| "layers.28.hc_ffn_fn": "model-00018-of-00026.safetensors", |
| "layers.28.hc_ffn_scale": "model-00018-of-00026.safetensors", |
| "layers.29.attn.attn_sink": "model-00018-of-00026.safetensors", |
| "layers.29.attn.compressor.ape": "model-00018-of-00026.safetensors", |
| "layers.29.attn.compressor.norm.weight": "model-00018-of-00026.safetensors", |
| "layers.29.attn.compressor.wgate.biases": "model-00018-of-00026.safetensors", |
| "layers.29.attn.compressor.wgate.scales": "model-00018-of-00026.safetensors", |
| "layers.29.attn.compressor.wgate.weight": "model-00018-of-00026.safetensors", |
| "layers.29.attn.compressor.wkv.biases": "model-00018-of-00026.safetensors", |
| "layers.29.attn.compressor.wkv.scales": "model-00018-of-00026.safetensors", |
| "layers.29.attn.compressor.wkv.weight": "model-00018-of-00026.safetensors", |
| "layers.29.attn.kv_norm.weight": "model-00018-of-00026.safetensors", |
| "layers.29.attn.q_norm.weight": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wkv.biases": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wkv.scales": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wkv.weight": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.0.biases": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.0.scales": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.0.weight": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.1.biases": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.1.scales": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.1.weight": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.2.biases": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.2.scales": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.2.weight": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.3.biases": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.3.scales": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.3.weight": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.4.biases": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.4.scales": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.4.weight": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.5.biases": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.5.scales": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.5.weight": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.6.biases": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.6.scales": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.6.weight": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.7.biases": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.7.scales": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_a.7.weight": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_b.biases": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_b.scales": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wo_b.weight": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wq_a.biases": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wq_a.scales": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wq_a.weight": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wq_b.biases": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wq_b.scales": "model-00018-of-00026.safetensors", |
| "layers.29.attn.wq_b.weight": "model-00018-of-00026.safetensors", |
| "layers.29.attn_norm.weight": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.experts.w1.biases": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.experts.w1.scales": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.experts.w1.weight": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.experts.w2.biases": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.experts.w2.scales": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.experts.w2.weight": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.experts.w3.biases": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.experts.w3.scales": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.experts.w3.weight": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.gate.bias": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.gate.weight": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.shared_experts.w1.biases": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.shared_experts.w1.scales": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.shared_experts.w1.weight": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.shared_experts.w2.biases": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.shared_experts.w2.scales": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.shared_experts.w2.weight": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.shared_experts.w3.biases": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.shared_experts.w3.scales": "model-00018-of-00026.safetensors", |
| "layers.29.ffn.shared_experts.w3.weight": "model-00018-of-00026.safetensors", |
| "layers.29.ffn_norm.weight": "model-00018-of-00026.safetensors", |
| "layers.29.hc_attn_base": "model-00018-of-00026.safetensors", |
| "layers.29.hc_attn_fn": "model-00018-of-00026.safetensors", |
| "layers.29.hc_attn_scale": "model-00018-of-00026.safetensors", |
| "layers.29.hc_ffn_base": "model-00018-of-00026.safetensors", |
| "layers.29.hc_ffn_fn": "model-00018-of-00026.safetensors", |
| "layers.29.hc_ffn_scale": "model-00018-of-00026.safetensors", |
| "layers.3.attn.attn_sink": "model-00002-of-00026.safetensors", |
| "layers.3.attn.compressor.ape": "model-00002-of-00026.safetensors", |
| "layers.3.attn.compressor.norm.weight": "model-00002-of-00026.safetensors", |
| "layers.3.attn.compressor.wgate.biases": "model-00002-of-00026.safetensors", |
| "layers.3.attn.compressor.wgate.scales": "model-00002-of-00026.safetensors", |
| "layers.3.attn.compressor.wgate.weight": "model-00002-of-00026.safetensors", |
| "layers.3.attn.compressor.wkv.biases": "model-00002-of-00026.safetensors", |
| "layers.3.attn.compressor.wkv.scales": "model-00002-of-00026.safetensors", |
| "layers.3.attn.compressor.wkv.weight": "model-00002-of-00026.safetensors", |
| "layers.3.attn.kv_norm.weight": "model-00002-of-00026.safetensors", |
| "layers.3.attn.q_norm.weight": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wkv.biases": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wkv.scales": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wkv.weight": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.0.biases": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.0.scales": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.0.weight": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.1.biases": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.1.scales": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.1.weight": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.2.biases": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.2.scales": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.2.weight": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.3.biases": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.3.scales": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.3.weight": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.4.biases": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.4.scales": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.4.weight": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.5.biases": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.5.scales": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.5.weight": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.6.biases": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.6.scales": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.6.weight": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.7.biases": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.7.scales": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_a.7.weight": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_b.biases": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_b.scales": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wo_b.weight": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wq_a.biases": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wq_a.scales": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wq_a.weight": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wq_b.biases": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wq_b.scales": "model-00002-of-00026.safetensors", |
| "layers.3.attn.wq_b.weight": "model-00002-of-00026.safetensors", |
| "layers.3.attn_norm.weight": "model-00003-of-00026.safetensors", |
| "layers.3.ffn.experts.w1.biases": "model-00002-of-00026.safetensors", |
| "layers.3.ffn.experts.w1.scales": "model-00002-of-00026.safetensors", |
| "layers.3.ffn.experts.w1.weight": "model-00002-of-00026.safetensors", |
| "layers.3.ffn.experts.w2.biases": "model-00003-of-00026.safetensors", |
| "layers.3.ffn.experts.w2.scales": "model-00003-of-00026.safetensors", |
| "layers.3.ffn.experts.w2.weight": "model-00003-of-00026.safetensors", |
| "layers.3.ffn.experts.w3.biases": "model-00003-of-00026.safetensors", |
| "layers.3.ffn.experts.w3.scales": "model-00003-of-00026.safetensors", |
| "layers.3.ffn.experts.w3.weight": "model-00003-of-00026.safetensors", |
| "layers.3.ffn.gate.bias": "model-00002-of-00026.safetensors", |
| "layers.3.ffn.gate.weight": "model-00002-of-00026.safetensors", |
| "layers.3.ffn.shared_experts.w1.biases": "model-00003-of-00026.safetensors", |
| "layers.3.ffn.shared_experts.w1.scales": "model-00003-of-00026.safetensors", |
| "layers.3.ffn.shared_experts.w1.weight": "model-00003-of-00026.safetensors", |
| "layers.3.ffn.shared_experts.w2.biases": "model-00003-of-00026.safetensors", |
| "layers.3.ffn.shared_experts.w2.scales": "model-00003-of-00026.safetensors", |
| "layers.3.ffn.shared_experts.w2.weight": "model-00003-of-00026.safetensors", |
| "layers.3.ffn.shared_experts.w3.biases": "model-00003-of-00026.safetensors", |
| "layers.3.ffn.shared_experts.w3.scales": "model-00003-of-00026.safetensors", |
| "layers.3.ffn.shared_experts.w3.weight": "model-00003-of-00026.safetensors", |
| "layers.3.ffn_norm.weight": "model-00003-of-00026.safetensors", |
| "layers.3.hc_attn_base": "model-00003-of-00026.safetensors", |
| "layers.3.hc_attn_fn": "model-00003-of-00026.safetensors", |
| "layers.3.hc_attn_scale": "model-00003-of-00026.safetensors", |
| "layers.3.hc_ffn_base": "model-00003-of-00026.safetensors", |
| "layers.3.hc_ffn_fn": "model-00003-of-00026.safetensors", |
| "layers.3.hc_ffn_scale": "model-00003-of-00026.safetensors", |
| "layers.30.attn.attn_sink": "model-00018-of-00026.safetensors", |
| "layers.30.attn.compressor.ape": "model-00018-of-00026.safetensors", |
| "layers.30.attn.compressor.norm.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.compressor.wgate.biases": "model-00018-of-00026.safetensors", |
| "layers.30.attn.compressor.wgate.scales": "model-00018-of-00026.safetensors", |
| "layers.30.attn.compressor.wgate.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.compressor.wkv.biases": "model-00018-of-00026.safetensors", |
| "layers.30.attn.compressor.wkv.scales": "model-00018-of-00026.safetensors", |
| "layers.30.attn.compressor.wkv.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.indexer.compressor.ape": "model-00018-of-00026.safetensors", |
| "layers.30.attn.indexer.compressor.norm.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.indexer.compressor.wgate.biases": "model-00018-of-00026.safetensors", |
| "layers.30.attn.indexer.compressor.wgate.scales": "model-00018-of-00026.safetensors", |
| "layers.30.attn.indexer.compressor.wgate.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.indexer.compressor.wkv.biases": "model-00018-of-00026.safetensors", |
| "layers.30.attn.indexer.compressor.wkv.scales": "model-00018-of-00026.safetensors", |
| "layers.30.attn.indexer.compressor.wkv.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.indexer.weights_proj.biases": "model-00018-of-00026.safetensors", |
| "layers.30.attn.indexer.weights_proj.scales": "model-00018-of-00026.safetensors", |
| "layers.30.attn.indexer.weights_proj.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.indexer.wq_b.biases": "model-00018-of-00026.safetensors", |
| "layers.30.attn.indexer.wq_b.scales": "model-00018-of-00026.safetensors", |
| "layers.30.attn.indexer.wq_b.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.kv_norm.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.q_norm.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wkv.biases": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wkv.scales": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wkv.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.0.biases": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.0.scales": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.0.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.1.biases": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.1.scales": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.1.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.2.biases": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.2.scales": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.2.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.3.biases": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.3.scales": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.3.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.4.biases": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.4.scales": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.4.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.5.biases": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.5.scales": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.5.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.6.biases": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.6.scales": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.6.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.7.biases": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.7.scales": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_a.7.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_b.biases": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_b.scales": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wo_b.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wq_a.biases": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wq_a.scales": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wq_a.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wq_b.biases": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wq_b.scales": "model-00018-of-00026.safetensors", |
| "layers.30.attn.wq_b.weight": "model-00018-of-00026.safetensors", |
| "layers.30.attn_norm.weight": "model-00019-of-00026.safetensors", |
| "layers.30.ffn.experts.w1.biases": "model-00019-of-00026.safetensors", |
| "layers.30.ffn.experts.w1.scales": "model-00019-of-00026.safetensors", |
| "layers.30.ffn.experts.w1.weight": "model-00019-of-00026.safetensors", |
| "layers.30.ffn.experts.w2.biases": "model-00019-of-00026.safetensors", |
| "layers.30.ffn.experts.w2.scales": "model-00019-of-00026.safetensors", |
| "layers.30.ffn.experts.w2.weight": "model-00019-of-00026.safetensors", |
| "layers.30.ffn.experts.w3.biases": "model-00019-of-00026.safetensors", |
| "layers.30.ffn.experts.w3.scales": "model-00019-of-00026.safetensors", |
| "layers.30.ffn.experts.w3.weight": "model-00019-of-00026.safetensors", |
| "layers.30.ffn.gate.bias": "model-00018-of-00026.safetensors", |
| "layers.30.ffn.gate.weight": "model-00018-of-00026.safetensors", |
| "layers.30.ffn.shared_experts.w1.biases": "model-00019-of-00026.safetensors", |
| "layers.30.ffn.shared_experts.w1.scales": "model-00019-of-00026.safetensors", |
| "layers.30.ffn.shared_experts.w1.weight": "model-00019-of-00026.safetensors", |
| "layers.30.ffn.shared_experts.w2.biases": "model-00019-of-00026.safetensors", |
| "layers.30.ffn.shared_experts.w2.scales": "model-00019-of-00026.safetensors", |
| "layers.30.ffn.shared_experts.w2.weight": "model-00019-of-00026.safetensors", |
| "layers.30.ffn.shared_experts.w3.biases": "model-00019-of-00026.safetensors", |
| "layers.30.ffn.shared_experts.w3.scales": "model-00019-of-00026.safetensors", |
| "layers.30.ffn.shared_experts.w3.weight": "model-00019-of-00026.safetensors", |
| "layers.30.ffn_norm.weight": "model-00019-of-00026.safetensors", |
| "layers.30.hc_attn_base": "model-00019-of-00026.safetensors", |
| "layers.30.hc_attn_fn": "model-00019-of-00026.safetensors", |
| "layers.30.hc_attn_scale": "model-00019-of-00026.safetensors", |
| "layers.30.hc_ffn_base": "model-00019-of-00026.safetensors", |
| "layers.30.hc_ffn_fn": "model-00019-of-00026.safetensors", |
| "layers.30.hc_ffn_scale": "model-00019-of-00026.safetensors", |
| "layers.31.attn.attn_sink": "model-00019-of-00026.safetensors", |
| "layers.31.attn.compressor.ape": "model-00019-of-00026.safetensors", |
| "layers.31.attn.compressor.norm.weight": "model-00019-of-00026.safetensors", |
| "layers.31.attn.compressor.wgate.biases": "model-00019-of-00026.safetensors", |
| "layers.31.attn.compressor.wgate.scales": "model-00019-of-00026.safetensors", |
| "layers.31.attn.compressor.wgate.weight": "model-00019-of-00026.safetensors", |
| "layers.31.attn.compressor.wkv.biases": "model-00019-of-00026.safetensors", |
| "layers.31.attn.compressor.wkv.scales": "model-00019-of-00026.safetensors", |
| "layers.31.attn.compressor.wkv.weight": "model-00019-of-00026.safetensors", |
| "layers.31.attn.kv_norm.weight": "model-00019-of-00026.safetensors", |
| "layers.31.attn.q_norm.weight": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wkv.biases": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wkv.scales": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wkv.weight": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.0.biases": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.0.scales": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.0.weight": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.1.biases": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.1.scales": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.1.weight": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.2.biases": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.2.scales": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.2.weight": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.3.biases": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.3.scales": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.3.weight": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.4.biases": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.4.scales": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.4.weight": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.5.biases": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.5.scales": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.5.weight": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.6.biases": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.6.scales": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.6.weight": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.7.biases": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.7.scales": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_a.7.weight": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_b.biases": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_b.scales": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wo_b.weight": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wq_a.biases": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wq_a.scales": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wq_a.weight": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wq_b.biases": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wq_b.scales": "model-00019-of-00026.safetensors", |
| "layers.31.attn.wq_b.weight": "model-00019-of-00026.safetensors", |
| "layers.31.attn_norm.weight": "model-00020-of-00026.safetensors", |
| "layers.31.ffn.experts.w1.biases": "model-00019-of-00026.safetensors", |
| "layers.31.ffn.experts.w1.scales": "model-00019-of-00026.safetensors", |
| "layers.31.ffn.experts.w1.weight": "model-00019-of-00026.safetensors", |
| "layers.31.ffn.experts.w2.biases": "model-00019-of-00026.safetensors", |
| "layers.31.ffn.experts.w2.scales": "model-00019-of-00026.safetensors", |
| "layers.31.ffn.experts.w2.weight": "model-00019-of-00026.safetensors", |
| "layers.31.ffn.experts.w3.biases": "model-00020-of-00026.safetensors", |
| "layers.31.ffn.experts.w3.scales": "model-00020-of-00026.safetensors", |
| "layers.31.ffn.experts.w3.weight": "model-00020-of-00026.safetensors", |
| "layers.31.ffn.gate.bias": "model-00019-of-00026.safetensors", |
| "layers.31.ffn.gate.weight": "model-00019-of-00026.safetensors", |
| "layers.31.ffn.shared_experts.w1.biases": "model-00020-of-00026.safetensors", |
| "layers.31.ffn.shared_experts.w1.scales": "model-00020-of-00026.safetensors", |
| "layers.31.ffn.shared_experts.w1.weight": "model-00020-of-00026.safetensors", |
| "layers.31.ffn.shared_experts.w2.biases": "model-00020-of-00026.safetensors", |
| "layers.31.ffn.shared_experts.w2.scales": "model-00020-of-00026.safetensors", |
| "layers.31.ffn.shared_experts.w2.weight": "model-00020-of-00026.safetensors", |
| "layers.31.ffn.shared_experts.w3.biases": "model-00020-of-00026.safetensors", |
| "layers.31.ffn.shared_experts.w3.scales": "model-00020-of-00026.safetensors", |
| "layers.31.ffn.shared_experts.w3.weight": "model-00020-of-00026.safetensors", |
| "layers.31.ffn_norm.weight": "model-00020-of-00026.safetensors", |
| "layers.31.hc_attn_base": "model-00020-of-00026.safetensors", |
| "layers.31.hc_attn_fn": "model-00020-of-00026.safetensors", |
| "layers.31.hc_attn_scale": "model-00020-of-00026.safetensors", |
| "layers.31.hc_ffn_base": "model-00020-of-00026.safetensors", |
| "layers.31.hc_ffn_fn": "model-00020-of-00026.safetensors", |
| "layers.31.hc_ffn_scale": "model-00020-of-00026.safetensors", |
| "layers.32.attn.attn_sink": "model-00020-of-00026.safetensors", |
| "layers.32.attn.compressor.ape": "model-00020-of-00026.safetensors", |
| "layers.32.attn.compressor.norm.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.compressor.wgate.biases": "model-00020-of-00026.safetensors", |
| "layers.32.attn.compressor.wgate.scales": "model-00020-of-00026.safetensors", |
| "layers.32.attn.compressor.wgate.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.compressor.wkv.biases": "model-00020-of-00026.safetensors", |
| "layers.32.attn.compressor.wkv.scales": "model-00020-of-00026.safetensors", |
| "layers.32.attn.compressor.wkv.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.indexer.compressor.ape": "model-00020-of-00026.safetensors", |
| "layers.32.attn.indexer.compressor.norm.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.indexer.compressor.wgate.biases": "model-00020-of-00026.safetensors", |
| "layers.32.attn.indexer.compressor.wgate.scales": "model-00020-of-00026.safetensors", |
| "layers.32.attn.indexer.compressor.wgate.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.indexer.compressor.wkv.biases": "model-00020-of-00026.safetensors", |
| "layers.32.attn.indexer.compressor.wkv.scales": "model-00020-of-00026.safetensors", |
| "layers.32.attn.indexer.compressor.wkv.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.indexer.weights_proj.biases": "model-00020-of-00026.safetensors", |
| "layers.32.attn.indexer.weights_proj.scales": "model-00020-of-00026.safetensors", |
| "layers.32.attn.indexer.weights_proj.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.indexer.wq_b.biases": "model-00020-of-00026.safetensors", |
| "layers.32.attn.indexer.wq_b.scales": "model-00020-of-00026.safetensors", |
| "layers.32.attn.indexer.wq_b.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.kv_norm.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.q_norm.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wkv.biases": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wkv.scales": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wkv.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.0.biases": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.0.scales": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.0.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.1.biases": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.1.scales": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.1.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.2.biases": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.2.scales": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.2.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.3.biases": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.3.scales": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.3.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.4.biases": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.4.scales": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.4.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.5.biases": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.5.scales": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.5.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.6.biases": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.6.scales": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.6.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.7.biases": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.7.scales": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_a.7.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_b.biases": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_b.scales": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wo_b.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wq_a.biases": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wq_a.scales": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wq_a.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wq_b.biases": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wq_b.scales": "model-00020-of-00026.safetensors", |
| "layers.32.attn.wq_b.weight": "model-00020-of-00026.safetensors", |
| "layers.32.attn_norm.weight": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.experts.w1.biases": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.experts.w1.scales": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.experts.w1.weight": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.experts.w2.biases": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.experts.w2.scales": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.experts.w2.weight": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.experts.w3.biases": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.experts.w3.scales": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.experts.w3.weight": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.gate.bias": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.gate.weight": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.shared_experts.w1.biases": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.shared_experts.w1.scales": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.shared_experts.w1.weight": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.shared_experts.w2.biases": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.shared_experts.w2.scales": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.shared_experts.w2.weight": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.shared_experts.w3.biases": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.shared_experts.w3.scales": "model-00020-of-00026.safetensors", |
| "layers.32.ffn.shared_experts.w3.weight": "model-00020-of-00026.safetensors", |
| "layers.32.ffn_norm.weight": "model-00020-of-00026.safetensors", |
| "layers.32.hc_attn_base": "model-00020-of-00026.safetensors", |
| "layers.32.hc_attn_fn": "model-00020-of-00026.safetensors", |
| "layers.32.hc_attn_scale": "model-00020-of-00026.safetensors", |
| "layers.32.hc_ffn_base": "model-00020-of-00026.safetensors", |
| "layers.32.hc_ffn_fn": "model-00020-of-00026.safetensors", |
| "layers.32.hc_ffn_scale": "model-00020-of-00026.safetensors", |
| "layers.33.attn.attn_sink": "model-00020-of-00026.safetensors", |
| "layers.33.attn.compressor.ape": "model-00020-of-00026.safetensors", |
| "layers.33.attn.compressor.norm.weight": "model-00020-of-00026.safetensors", |
| "layers.33.attn.compressor.wgate.biases": "model-00020-of-00026.safetensors", |
| "layers.33.attn.compressor.wgate.scales": "model-00020-of-00026.safetensors", |
| "layers.33.attn.compressor.wgate.weight": "model-00020-of-00026.safetensors", |
| "layers.33.attn.compressor.wkv.biases": "model-00020-of-00026.safetensors", |
| "layers.33.attn.compressor.wkv.scales": "model-00020-of-00026.safetensors", |
| "layers.33.attn.compressor.wkv.weight": "model-00020-of-00026.safetensors", |
| "layers.33.attn.kv_norm.weight": "model-00020-of-00026.safetensors", |
| "layers.33.attn.q_norm.weight": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wkv.biases": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wkv.scales": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wkv.weight": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.0.biases": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.0.scales": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.0.weight": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.1.biases": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.1.scales": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.1.weight": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.2.biases": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.2.scales": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.2.weight": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.3.biases": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.3.scales": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.3.weight": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.4.biases": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.4.scales": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.4.weight": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.5.biases": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.5.scales": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.5.weight": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.6.biases": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.6.scales": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.6.weight": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.7.biases": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.7.scales": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_a.7.weight": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_b.biases": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_b.scales": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wo_b.weight": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wq_a.biases": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wq_a.scales": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wq_a.weight": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wq_b.biases": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wq_b.scales": "model-00020-of-00026.safetensors", |
| "layers.33.attn.wq_b.weight": "model-00020-of-00026.safetensors", |
| "layers.33.attn_norm.weight": "model-00021-of-00026.safetensors", |
| "layers.33.ffn.experts.w1.biases": "model-00020-of-00026.safetensors", |
| "layers.33.ffn.experts.w1.scales": "model-00020-of-00026.safetensors", |
| "layers.33.ffn.experts.w1.weight": "model-00020-of-00026.safetensors", |
| "layers.33.ffn.experts.w2.biases": "model-00021-of-00026.safetensors", |
| "layers.33.ffn.experts.w2.scales": "model-00021-of-00026.safetensors", |
| "layers.33.ffn.experts.w2.weight": "model-00021-of-00026.safetensors", |
| "layers.33.ffn.experts.w3.biases": "model-00021-of-00026.safetensors", |
| "layers.33.ffn.experts.w3.scales": "model-00021-of-00026.safetensors", |
| "layers.33.ffn.experts.w3.weight": "model-00021-of-00026.safetensors", |
| "layers.33.ffn.gate.bias": "model-00020-of-00026.safetensors", |
| "layers.33.ffn.gate.weight": "model-00020-of-00026.safetensors", |
| "layers.33.ffn.shared_experts.w1.biases": "model-00021-of-00026.safetensors", |
| "layers.33.ffn.shared_experts.w1.scales": "model-00021-of-00026.safetensors", |
| "layers.33.ffn.shared_experts.w1.weight": "model-00021-of-00026.safetensors", |
| "layers.33.ffn.shared_experts.w2.biases": "model-00021-of-00026.safetensors", |
| "layers.33.ffn.shared_experts.w2.scales": "model-00021-of-00026.safetensors", |
| "layers.33.ffn.shared_experts.w2.weight": "model-00021-of-00026.safetensors", |
| "layers.33.ffn.shared_experts.w3.biases": "model-00021-of-00026.safetensors", |
| "layers.33.ffn.shared_experts.w3.scales": "model-00021-of-00026.safetensors", |
| "layers.33.ffn.shared_experts.w3.weight": "model-00021-of-00026.safetensors", |
| "layers.33.ffn_norm.weight": "model-00021-of-00026.safetensors", |
| "layers.33.hc_attn_base": "model-00021-of-00026.safetensors", |
| "layers.33.hc_attn_fn": "model-00021-of-00026.safetensors", |
| "layers.33.hc_attn_scale": "model-00021-of-00026.safetensors", |
| "layers.33.hc_ffn_base": "model-00021-of-00026.safetensors", |
| "layers.33.hc_ffn_fn": "model-00021-of-00026.safetensors", |
| "layers.33.hc_ffn_scale": "model-00021-of-00026.safetensors", |
| "layers.34.attn.attn_sink": "model-00021-of-00026.safetensors", |
| "layers.34.attn.compressor.ape": "model-00021-of-00026.safetensors", |
| "layers.34.attn.compressor.norm.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.compressor.wgate.biases": "model-00021-of-00026.safetensors", |
| "layers.34.attn.compressor.wgate.scales": "model-00021-of-00026.safetensors", |
| "layers.34.attn.compressor.wgate.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.compressor.wkv.biases": "model-00021-of-00026.safetensors", |
| "layers.34.attn.compressor.wkv.scales": "model-00021-of-00026.safetensors", |
| "layers.34.attn.compressor.wkv.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.indexer.compressor.ape": "model-00021-of-00026.safetensors", |
| "layers.34.attn.indexer.compressor.norm.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.indexer.compressor.wgate.biases": "model-00021-of-00026.safetensors", |
| "layers.34.attn.indexer.compressor.wgate.scales": "model-00021-of-00026.safetensors", |
| "layers.34.attn.indexer.compressor.wgate.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.indexer.compressor.wkv.biases": "model-00021-of-00026.safetensors", |
| "layers.34.attn.indexer.compressor.wkv.scales": "model-00021-of-00026.safetensors", |
| "layers.34.attn.indexer.compressor.wkv.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.indexer.weights_proj.biases": "model-00021-of-00026.safetensors", |
| "layers.34.attn.indexer.weights_proj.scales": "model-00021-of-00026.safetensors", |
| "layers.34.attn.indexer.weights_proj.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.indexer.wq_b.biases": "model-00021-of-00026.safetensors", |
| "layers.34.attn.indexer.wq_b.scales": "model-00021-of-00026.safetensors", |
| "layers.34.attn.indexer.wq_b.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.kv_norm.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.q_norm.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wkv.biases": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wkv.scales": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wkv.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.0.biases": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.0.scales": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.0.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.1.biases": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.1.scales": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.1.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.2.biases": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.2.scales": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.2.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.3.biases": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.3.scales": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.3.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.4.biases": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.4.scales": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.4.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.5.biases": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.5.scales": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.5.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.6.biases": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.6.scales": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.6.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.7.biases": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.7.scales": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_a.7.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_b.biases": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_b.scales": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wo_b.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wq_a.biases": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wq_a.scales": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wq_a.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wq_b.biases": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wq_b.scales": "model-00021-of-00026.safetensors", |
| "layers.34.attn.wq_b.weight": "model-00021-of-00026.safetensors", |
| "layers.34.attn_norm.weight": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.experts.w1.biases": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.experts.w1.scales": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.experts.w1.weight": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.experts.w2.biases": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.experts.w2.scales": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.experts.w2.weight": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.experts.w3.biases": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.experts.w3.scales": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.experts.w3.weight": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.gate.bias": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.gate.weight": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.shared_experts.w1.biases": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.shared_experts.w1.scales": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.shared_experts.w1.weight": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.shared_experts.w2.biases": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.shared_experts.w2.scales": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.shared_experts.w2.weight": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.shared_experts.w3.biases": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.shared_experts.w3.scales": "model-00021-of-00026.safetensors", |
| "layers.34.ffn.shared_experts.w3.weight": "model-00021-of-00026.safetensors", |
| "layers.34.ffn_norm.weight": "model-00021-of-00026.safetensors", |
| "layers.34.hc_attn_base": "model-00021-of-00026.safetensors", |
| "layers.34.hc_attn_fn": "model-00021-of-00026.safetensors", |
| "layers.34.hc_attn_scale": "model-00021-of-00026.safetensors", |
| "layers.34.hc_ffn_base": "model-00021-of-00026.safetensors", |
| "layers.34.hc_ffn_fn": "model-00021-of-00026.safetensors", |
| "layers.34.hc_ffn_scale": "model-00021-of-00026.safetensors", |
| "layers.35.attn.attn_sink": "model-00021-of-00026.safetensors", |
| "layers.35.attn.compressor.ape": "model-00021-of-00026.safetensors", |
| "layers.35.attn.compressor.norm.weight": "model-00021-of-00026.safetensors", |
| "layers.35.attn.compressor.wgate.biases": "model-00021-of-00026.safetensors", |
| "layers.35.attn.compressor.wgate.scales": "model-00021-of-00026.safetensors", |
| "layers.35.attn.compressor.wgate.weight": "model-00021-of-00026.safetensors", |
| "layers.35.attn.compressor.wkv.biases": "model-00021-of-00026.safetensors", |
| "layers.35.attn.compressor.wkv.scales": "model-00021-of-00026.safetensors", |
| "layers.35.attn.compressor.wkv.weight": "model-00021-of-00026.safetensors", |
| "layers.35.attn.kv_norm.weight": "model-00021-of-00026.safetensors", |
| "layers.35.attn.q_norm.weight": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wkv.biases": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wkv.scales": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wkv.weight": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.0.biases": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.0.scales": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.0.weight": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.1.biases": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.1.scales": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.1.weight": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.2.biases": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.2.scales": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.2.weight": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.3.biases": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.3.scales": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.3.weight": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.4.biases": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.4.scales": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.4.weight": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.5.biases": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.5.scales": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.5.weight": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.6.biases": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.6.scales": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.6.weight": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.7.biases": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.7.scales": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_a.7.weight": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_b.biases": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_b.scales": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wo_b.weight": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wq_a.biases": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wq_a.scales": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wq_a.weight": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wq_b.biases": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wq_b.scales": "model-00021-of-00026.safetensors", |
| "layers.35.attn.wq_b.weight": "model-00021-of-00026.safetensors", |
| "layers.35.attn_norm.weight": "model-00022-of-00026.safetensors", |
| "layers.35.ffn.experts.w1.biases": "model-00022-of-00026.safetensors", |
| "layers.35.ffn.experts.w1.scales": "model-00022-of-00026.safetensors", |
| "layers.35.ffn.experts.w1.weight": "model-00022-of-00026.safetensors", |
| "layers.35.ffn.experts.w2.biases": "model-00022-of-00026.safetensors", |
| "layers.35.ffn.experts.w2.scales": "model-00022-of-00026.safetensors", |
| "layers.35.ffn.experts.w2.weight": "model-00022-of-00026.safetensors", |
| "layers.35.ffn.experts.w3.biases": "model-00022-of-00026.safetensors", |
| "layers.35.ffn.experts.w3.scales": "model-00022-of-00026.safetensors", |
| "layers.35.ffn.experts.w3.weight": "model-00022-of-00026.safetensors", |
| "layers.35.ffn.gate.bias": "model-00021-of-00026.safetensors", |
| "layers.35.ffn.gate.weight": "model-00021-of-00026.safetensors", |
| "layers.35.ffn.shared_experts.w1.biases": "model-00022-of-00026.safetensors", |
| "layers.35.ffn.shared_experts.w1.scales": "model-00022-of-00026.safetensors", |
| "layers.35.ffn.shared_experts.w1.weight": "model-00022-of-00026.safetensors", |
| "layers.35.ffn.shared_experts.w2.biases": "model-00022-of-00026.safetensors", |
| "layers.35.ffn.shared_experts.w2.scales": "model-00022-of-00026.safetensors", |
| "layers.35.ffn.shared_experts.w2.weight": "model-00022-of-00026.safetensors", |
| "layers.35.ffn.shared_experts.w3.biases": "model-00022-of-00026.safetensors", |
| "layers.35.ffn.shared_experts.w3.scales": "model-00022-of-00026.safetensors", |
| "layers.35.ffn.shared_experts.w3.weight": "model-00022-of-00026.safetensors", |
| "layers.35.ffn_norm.weight": "model-00022-of-00026.safetensors", |
| "layers.35.hc_attn_base": "model-00022-of-00026.safetensors", |
| "layers.35.hc_attn_fn": "model-00022-of-00026.safetensors", |
| "layers.35.hc_attn_scale": "model-00022-of-00026.safetensors", |
| "layers.35.hc_ffn_base": "model-00022-of-00026.safetensors", |
| "layers.35.hc_ffn_fn": "model-00022-of-00026.safetensors", |
| "layers.35.hc_ffn_scale": "model-00022-of-00026.safetensors", |
| "layers.36.attn.attn_sink": "model-00022-of-00026.safetensors", |
| "layers.36.attn.compressor.ape": "model-00022-of-00026.safetensors", |
| "layers.36.attn.compressor.norm.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.compressor.wgate.biases": "model-00022-of-00026.safetensors", |
| "layers.36.attn.compressor.wgate.scales": "model-00022-of-00026.safetensors", |
| "layers.36.attn.compressor.wgate.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.compressor.wkv.biases": "model-00022-of-00026.safetensors", |
| "layers.36.attn.compressor.wkv.scales": "model-00022-of-00026.safetensors", |
| "layers.36.attn.compressor.wkv.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.indexer.compressor.ape": "model-00022-of-00026.safetensors", |
| "layers.36.attn.indexer.compressor.norm.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.indexer.compressor.wgate.biases": "model-00022-of-00026.safetensors", |
| "layers.36.attn.indexer.compressor.wgate.scales": "model-00022-of-00026.safetensors", |
| "layers.36.attn.indexer.compressor.wgate.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.indexer.compressor.wkv.biases": "model-00022-of-00026.safetensors", |
| "layers.36.attn.indexer.compressor.wkv.scales": "model-00022-of-00026.safetensors", |
| "layers.36.attn.indexer.compressor.wkv.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.indexer.weights_proj.biases": "model-00022-of-00026.safetensors", |
| "layers.36.attn.indexer.weights_proj.scales": "model-00022-of-00026.safetensors", |
| "layers.36.attn.indexer.weights_proj.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.indexer.wq_b.biases": "model-00022-of-00026.safetensors", |
| "layers.36.attn.indexer.wq_b.scales": "model-00022-of-00026.safetensors", |
| "layers.36.attn.indexer.wq_b.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.kv_norm.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.q_norm.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wkv.biases": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wkv.scales": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wkv.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.0.biases": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.0.scales": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.0.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.1.biases": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.1.scales": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.1.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.2.biases": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.2.scales": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.2.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.3.biases": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.3.scales": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.3.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.4.biases": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.4.scales": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.4.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.5.biases": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.5.scales": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.5.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.6.biases": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.6.scales": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.6.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.7.biases": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.7.scales": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_a.7.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_b.biases": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_b.scales": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wo_b.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wq_a.biases": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wq_a.scales": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wq_a.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wq_b.biases": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wq_b.scales": "model-00022-of-00026.safetensors", |
| "layers.36.attn.wq_b.weight": "model-00022-of-00026.safetensors", |
| "layers.36.attn_norm.weight": "model-00023-of-00026.safetensors", |
| "layers.36.ffn.experts.w1.biases": "model-00022-of-00026.safetensors", |
| "layers.36.ffn.experts.w1.scales": "model-00022-of-00026.safetensors", |
| "layers.36.ffn.experts.w1.weight": "model-00022-of-00026.safetensors", |
| "layers.36.ffn.experts.w2.biases": "model-00022-of-00026.safetensors", |
| "layers.36.ffn.experts.w2.scales": "model-00022-of-00026.safetensors", |
| "layers.36.ffn.experts.w2.weight": "model-00022-of-00026.safetensors", |
| "layers.36.ffn.experts.w3.biases": "model-00023-of-00026.safetensors", |
| "layers.36.ffn.experts.w3.scales": "model-00023-of-00026.safetensors", |
| "layers.36.ffn.experts.w3.weight": "model-00023-of-00026.safetensors", |
| "layers.36.ffn.gate.bias": "model-00022-of-00026.safetensors", |
| "layers.36.ffn.gate.weight": "model-00022-of-00026.safetensors", |
| "layers.36.ffn.shared_experts.w1.biases": "model-00023-of-00026.safetensors", |
| "layers.36.ffn.shared_experts.w1.scales": "model-00023-of-00026.safetensors", |
| "layers.36.ffn.shared_experts.w1.weight": "model-00023-of-00026.safetensors", |
| "layers.36.ffn.shared_experts.w2.biases": "model-00023-of-00026.safetensors", |
| "layers.36.ffn.shared_experts.w2.scales": "model-00023-of-00026.safetensors", |
| "layers.36.ffn.shared_experts.w2.weight": "model-00023-of-00026.safetensors", |
| "layers.36.ffn.shared_experts.w3.biases": "model-00023-of-00026.safetensors", |
| "layers.36.ffn.shared_experts.w3.scales": "model-00023-of-00026.safetensors", |
| "layers.36.ffn.shared_experts.w3.weight": "model-00023-of-00026.safetensors", |
| "layers.36.ffn_norm.weight": "model-00023-of-00026.safetensors", |
| "layers.36.hc_attn_base": "model-00023-of-00026.safetensors", |
| "layers.36.hc_attn_fn": "model-00023-of-00026.safetensors", |
| "layers.36.hc_attn_scale": "model-00023-of-00026.safetensors", |
| "layers.36.hc_ffn_base": "model-00023-of-00026.safetensors", |
| "layers.36.hc_ffn_fn": "model-00023-of-00026.safetensors", |
| "layers.36.hc_ffn_scale": "model-00023-of-00026.safetensors", |
| "layers.37.attn.attn_sink": "model-00023-of-00026.safetensors", |
| "layers.37.attn.compressor.ape": "model-00023-of-00026.safetensors", |
| "layers.37.attn.compressor.norm.weight": "model-00023-of-00026.safetensors", |
| "layers.37.attn.compressor.wgate.biases": "model-00023-of-00026.safetensors", |
| "layers.37.attn.compressor.wgate.scales": "model-00023-of-00026.safetensors", |
| "layers.37.attn.compressor.wgate.weight": "model-00023-of-00026.safetensors", |
| "layers.37.attn.compressor.wkv.biases": "model-00023-of-00026.safetensors", |
| "layers.37.attn.compressor.wkv.scales": "model-00023-of-00026.safetensors", |
| "layers.37.attn.compressor.wkv.weight": "model-00023-of-00026.safetensors", |
| "layers.37.attn.kv_norm.weight": "model-00023-of-00026.safetensors", |
| "layers.37.attn.q_norm.weight": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wkv.biases": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wkv.scales": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wkv.weight": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.0.biases": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.0.scales": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.0.weight": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.1.biases": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.1.scales": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.1.weight": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.2.biases": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.2.scales": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.2.weight": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.3.biases": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.3.scales": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.3.weight": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.4.biases": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.4.scales": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.4.weight": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.5.biases": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.5.scales": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.5.weight": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.6.biases": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.6.scales": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.6.weight": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.7.biases": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.7.scales": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_a.7.weight": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_b.biases": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_b.scales": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wo_b.weight": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wq_a.biases": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wq_a.scales": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wq_a.weight": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wq_b.biases": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wq_b.scales": "model-00023-of-00026.safetensors", |
| "layers.37.attn.wq_b.weight": "model-00023-of-00026.safetensors", |
| "layers.37.attn_norm.weight": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.experts.w1.biases": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.experts.w1.scales": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.experts.w1.weight": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.experts.w2.biases": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.experts.w2.scales": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.experts.w2.weight": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.experts.w3.biases": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.experts.w3.scales": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.experts.w3.weight": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.gate.bias": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.gate.weight": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.shared_experts.w1.biases": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.shared_experts.w1.scales": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.shared_experts.w1.weight": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.shared_experts.w2.biases": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.shared_experts.w2.scales": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.shared_experts.w2.weight": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.shared_experts.w3.biases": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.shared_experts.w3.scales": "model-00023-of-00026.safetensors", |
| "layers.37.ffn.shared_experts.w3.weight": "model-00023-of-00026.safetensors", |
| "layers.37.ffn_norm.weight": "model-00023-of-00026.safetensors", |
| "layers.37.hc_attn_base": "model-00023-of-00026.safetensors", |
| "layers.37.hc_attn_fn": "model-00023-of-00026.safetensors", |
| "layers.37.hc_attn_scale": "model-00023-of-00026.safetensors", |
| "layers.37.hc_ffn_base": "model-00023-of-00026.safetensors", |
| "layers.37.hc_ffn_fn": "model-00023-of-00026.safetensors", |
| "layers.37.hc_ffn_scale": "model-00023-of-00026.safetensors", |
| "layers.38.attn.attn_sink": "model-00023-of-00026.safetensors", |
| "layers.38.attn.compressor.ape": "model-00023-of-00026.safetensors", |
| "layers.38.attn.compressor.norm.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.compressor.wgate.biases": "model-00023-of-00026.safetensors", |
| "layers.38.attn.compressor.wgate.scales": "model-00023-of-00026.safetensors", |
| "layers.38.attn.compressor.wgate.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.compressor.wkv.biases": "model-00023-of-00026.safetensors", |
| "layers.38.attn.compressor.wkv.scales": "model-00023-of-00026.safetensors", |
| "layers.38.attn.compressor.wkv.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.indexer.compressor.ape": "model-00023-of-00026.safetensors", |
| "layers.38.attn.indexer.compressor.norm.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.indexer.compressor.wgate.biases": "model-00023-of-00026.safetensors", |
| "layers.38.attn.indexer.compressor.wgate.scales": "model-00023-of-00026.safetensors", |
| "layers.38.attn.indexer.compressor.wgate.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.indexer.compressor.wkv.biases": "model-00023-of-00026.safetensors", |
| "layers.38.attn.indexer.compressor.wkv.scales": "model-00023-of-00026.safetensors", |
| "layers.38.attn.indexer.compressor.wkv.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.indexer.weights_proj.biases": "model-00023-of-00026.safetensors", |
| "layers.38.attn.indexer.weights_proj.scales": "model-00023-of-00026.safetensors", |
| "layers.38.attn.indexer.weights_proj.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.indexer.wq_b.biases": "model-00023-of-00026.safetensors", |
| "layers.38.attn.indexer.wq_b.scales": "model-00023-of-00026.safetensors", |
| "layers.38.attn.indexer.wq_b.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.kv_norm.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.q_norm.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wkv.biases": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wkv.scales": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wkv.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.0.biases": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.0.scales": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.0.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.1.biases": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.1.scales": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.1.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.2.biases": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.2.scales": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.2.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.3.biases": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.3.scales": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.3.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.4.biases": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.4.scales": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.4.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.5.biases": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.5.scales": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.5.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.6.biases": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.6.scales": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.6.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.7.biases": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.7.scales": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_a.7.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_b.biases": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_b.scales": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wo_b.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wq_a.biases": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wq_a.scales": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wq_a.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wq_b.biases": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wq_b.scales": "model-00023-of-00026.safetensors", |
| "layers.38.attn.wq_b.weight": "model-00023-of-00026.safetensors", |
| "layers.38.attn_norm.weight": "model-00024-of-00026.safetensors", |
| "layers.38.ffn.experts.w1.biases": "model-00023-of-00026.safetensors", |
| "layers.38.ffn.experts.w1.scales": "model-00023-of-00026.safetensors", |
| "layers.38.ffn.experts.w1.weight": "model-00023-of-00026.safetensors", |
| "layers.38.ffn.experts.w2.biases": "model-00024-of-00026.safetensors", |
| "layers.38.ffn.experts.w2.scales": "model-00024-of-00026.safetensors", |
| "layers.38.ffn.experts.w2.weight": "model-00024-of-00026.safetensors", |
| "layers.38.ffn.experts.w3.biases": "model-00024-of-00026.safetensors", |
| "layers.38.ffn.experts.w3.scales": "model-00024-of-00026.safetensors", |
| "layers.38.ffn.experts.w3.weight": "model-00024-of-00026.safetensors", |
| "layers.38.ffn.gate.bias": "model-00023-of-00026.safetensors", |
| "layers.38.ffn.gate.weight": "model-00023-of-00026.safetensors", |
| "layers.38.ffn.shared_experts.w1.biases": "model-00024-of-00026.safetensors", |
| "layers.38.ffn.shared_experts.w1.scales": "model-00024-of-00026.safetensors", |
| "layers.38.ffn.shared_experts.w1.weight": "model-00024-of-00026.safetensors", |
| "layers.38.ffn.shared_experts.w2.biases": "model-00024-of-00026.safetensors", |
| "layers.38.ffn.shared_experts.w2.scales": "model-00024-of-00026.safetensors", |
| "layers.38.ffn.shared_experts.w2.weight": "model-00024-of-00026.safetensors", |
| "layers.38.ffn.shared_experts.w3.biases": "model-00024-of-00026.safetensors", |
| "layers.38.ffn.shared_experts.w3.scales": "model-00024-of-00026.safetensors", |
| "layers.38.ffn.shared_experts.w3.weight": "model-00024-of-00026.safetensors", |
| "layers.38.ffn_norm.weight": "model-00024-of-00026.safetensors", |
| "layers.38.hc_attn_base": "model-00024-of-00026.safetensors", |
| "layers.38.hc_attn_fn": "model-00024-of-00026.safetensors", |
| "layers.38.hc_attn_scale": "model-00024-of-00026.safetensors", |
| "layers.38.hc_ffn_base": "model-00024-of-00026.safetensors", |
| "layers.38.hc_ffn_fn": "model-00024-of-00026.safetensors", |
| "layers.38.hc_ffn_scale": "model-00024-of-00026.safetensors", |
| "layers.39.attn.attn_sink": "model-00024-of-00026.safetensors", |
| "layers.39.attn.compressor.ape": "model-00024-of-00026.safetensors", |
| "layers.39.attn.compressor.norm.weight": "model-00024-of-00026.safetensors", |
| "layers.39.attn.compressor.wgate.biases": "model-00024-of-00026.safetensors", |
| "layers.39.attn.compressor.wgate.scales": "model-00024-of-00026.safetensors", |
| "layers.39.attn.compressor.wgate.weight": "model-00024-of-00026.safetensors", |
| "layers.39.attn.compressor.wkv.biases": "model-00024-of-00026.safetensors", |
| "layers.39.attn.compressor.wkv.scales": "model-00024-of-00026.safetensors", |
| "layers.39.attn.compressor.wkv.weight": "model-00024-of-00026.safetensors", |
| "layers.39.attn.kv_norm.weight": "model-00024-of-00026.safetensors", |
| "layers.39.attn.q_norm.weight": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wkv.biases": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wkv.scales": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wkv.weight": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.0.biases": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.0.scales": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.0.weight": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.1.biases": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.1.scales": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.1.weight": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.2.biases": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.2.scales": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.2.weight": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.3.biases": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.3.scales": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.3.weight": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.4.biases": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.4.scales": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.4.weight": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.5.biases": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.5.scales": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.5.weight": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.6.biases": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.6.scales": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.6.weight": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.7.biases": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.7.scales": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_a.7.weight": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_b.biases": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_b.scales": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wo_b.weight": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wq_a.biases": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wq_a.scales": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wq_a.weight": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wq_b.biases": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wq_b.scales": "model-00024-of-00026.safetensors", |
| "layers.39.attn.wq_b.weight": "model-00024-of-00026.safetensors", |
| "layers.39.attn_norm.weight": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.experts.w1.biases": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.experts.w1.scales": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.experts.w1.weight": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.experts.w2.biases": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.experts.w2.scales": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.experts.w2.weight": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.experts.w3.biases": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.experts.w3.scales": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.experts.w3.weight": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.gate.bias": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.gate.weight": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.shared_experts.w1.biases": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.shared_experts.w1.scales": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.shared_experts.w1.weight": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.shared_experts.w2.biases": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.shared_experts.w2.scales": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.shared_experts.w2.weight": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.shared_experts.w3.biases": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.shared_experts.w3.scales": "model-00024-of-00026.safetensors", |
| "layers.39.ffn.shared_experts.w3.weight": "model-00024-of-00026.safetensors", |
| "layers.39.ffn_norm.weight": "model-00024-of-00026.safetensors", |
| "layers.39.hc_attn_base": "model-00024-of-00026.safetensors", |
| "layers.39.hc_attn_fn": "model-00024-of-00026.safetensors", |
| "layers.39.hc_attn_scale": "model-00024-of-00026.safetensors", |
| "layers.39.hc_ffn_base": "model-00024-of-00026.safetensors", |
| "layers.39.hc_ffn_fn": "model-00024-of-00026.safetensors", |
| "layers.39.hc_ffn_scale": "model-00024-of-00026.safetensors", |
| "layers.4.attn.attn_sink": "model-00003-of-00026.safetensors", |
| "layers.4.attn.compressor.ape": "model-00003-of-00026.safetensors", |
| "layers.4.attn.compressor.norm.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.compressor.wgate.biases": "model-00003-of-00026.safetensors", |
| "layers.4.attn.compressor.wgate.scales": "model-00003-of-00026.safetensors", |
| "layers.4.attn.compressor.wgate.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.compressor.wkv.biases": "model-00003-of-00026.safetensors", |
| "layers.4.attn.compressor.wkv.scales": "model-00003-of-00026.safetensors", |
| "layers.4.attn.compressor.wkv.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.indexer.compressor.ape": "model-00003-of-00026.safetensors", |
| "layers.4.attn.indexer.compressor.norm.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.indexer.compressor.wgate.biases": "model-00003-of-00026.safetensors", |
| "layers.4.attn.indexer.compressor.wgate.scales": "model-00003-of-00026.safetensors", |
| "layers.4.attn.indexer.compressor.wgate.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.indexer.compressor.wkv.biases": "model-00003-of-00026.safetensors", |
| "layers.4.attn.indexer.compressor.wkv.scales": "model-00003-of-00026.safetensors", |
| "layers.4.attn.indexer.compressor.wkv.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.indexer.weights_proj.biases": "model-00003-of-00026.safetensors", |
| "layers.4.attn.indexer.weights_proj.scales": "model-00003-of-00026.safetensors", |
| "layers.4.attn.indexer.weights_proj.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.indexer.wq_b.biases": "model-00003-of-00026.safetensors", |
| "layers.4.attn.indexer.wq_b.scales": "model-00003-of-00026.safetensors", |
| "layers.4.attn.indexer.wq_b.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.kv_norm.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.q_norm.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wkv.biases": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wkv.scales": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wkv.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.0.biases": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.0.scales": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.0.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.1.biases": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.1.scales": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.1.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.2.biases": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.2.scales": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.2.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.3.biases": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.3.scales": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.3.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.4.biases": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.4.scales": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.4.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.5.biases": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.5.scales": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.5.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.6.biases": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.6.scales": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.6.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.7.biases": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.7.scales": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_a.7.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_b.biases": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_b.scales": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wo_b.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wq_a.biases": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wq_a.scales": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wq_a.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wq_b.biases": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wq_b.scales": "model-00003-of-00026.safetensors", |
| "layers.4.attn.wq_b.weight": "model-00003-of-00026.safetensors", |
| "layers.4.attn_norm.weight": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.experts.w1.biases": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.experts.w1.scales": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.experts.w1.weight": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.experts.w2.biases": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.experts.w2.scales": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.experts.w2.weight": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.experts.w3.biases": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.experts.w3.scales": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.experts.w3.weight": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.gate.bias": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.gate.weight": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.shared_experts.w1.biases": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.shared_experts.w1.scales": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.shared_experts.w1.weight": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.shared_experts.w2.biases": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.shared_experts.w2.scales": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.shared_experts.w2.weight": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.shared_experts.w3.biases": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.shared_experts.w3.scales": "model-00003-of-00026.safetensors", |
| "layers.4.ffn.shared_experts.w3.weight": "model-00003-of-00026.safetensors", |
| "layers.4.ffn_norm.weight": "model-00003-of-00026.safetensors", |
| "layers.4.hc_attn_base": "model-00003-of-00026.safetensors", |
| "layers.4.hc_attn_fn": "model-00003-of-00026.safetensors", |
| "layers.4.hc_attn_scale": "model-00003-of-00026.safetensors", |
| "layers.4.hc_ffn_base": "model-00003-of-00026.safetensors", |
| "layers.4.hc_ffn_fn": "model-00003-of-00026.safetensors", |
| "layers.4.hc_ffn_scale": "model-00003-of-00026.safetensors", |
| "layers.40.attn.attn_sink": "model-00024-of-00026.safetensors", |
| "layers.40.attn.compressor.ape": "model-00024-of-00026.safetensors", |
| "layers.40.attn.compressor.norm.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.compressor.wgate.biases": "model-00024-of-00026.safetensors", |
| "layers.40.attn.compressor.wgate.scales": "model-00024-of-00026.safetensors", |
| "layers.40.attn.compressor.wgate.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.compressor.wkv.biases": "model-00024-of-00026.safetensors", |
| "layers.40.attn.compressor.wkv.scales": "model-00024-of-00026.safetensors", |
| "layers.40.attn.compressor.wkv.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.indexer.compressor.ape": "model-00024-of-00026.safetensors", |
| "layers.40.attn.indexer.compressor.norm.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.indexer.compressor.wgate.biases": "model-00024-of-00026.safetensors", |
| "layers.40.attn.indexer.compressor.wgate.scales": "model-00024-of-00026.safetensors", |
| "layers.40.attn.indexer.compressor.wgate.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.indexer.compressor.wkv.biases": "model-00024-of-00026.safetensors", |
| "layers.40.attn.indexer.compressor.wkv.scales": "model-00024-of-00026.safetensors", |
| "layers.40.attn.indexer.compressor.wkv.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.indexer.weights_proj.biases": "model-00024-of-00026.safetensors", |
| "layers.40.attn.indexer.weights_proj.scales": "model-00024-of-00026.safetensors", |
| "layers.40.attn.indexer.weights_proj.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.indexer.wq_b.biases": "model-00024-of-00026.safetensors", |
| "layers.40.attn.indexer.wq_b.scales": "model-00024-of-00026.safetensors", |
| "layers.40.attn.indexer.wq_b.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.kv_norm.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.q_norm.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wkv.biases": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wkv.scales": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wkv.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.0.biases": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.0.scales": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.0.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.1.biases": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.1.scales": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.1.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.2.biases": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.2.scales": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.2.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.3.biases": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.3.scales": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.3.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.4.biases": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.4.scales": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.4.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.5.biases": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.5.scales": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.5.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.6.biases": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.6.scales": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.6.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.7.biases": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.7.scales": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_a.7.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_b.biases": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_b.scales": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wo_b.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wq_a.biases": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wq_a.scales": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wq_a.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wq_b.biases": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wq_b.scales": "model-00024-of-00026.safetensors", |
| "layers.40.attn.wq_b.weight": "model-00024-of-00026.safetensors", |
| "layers.40.attn_norm.weight": "model-00025-of-00026.safetensors", |
| "layers.40.ffn.experts.w1.biases": "model-00025-of-00026.safetensors", |
| "layers.40.ffn.experts.w1.scales": "model-00025-of-00026.safetensors", |
| "layers.40.ffn.experts.w1.weight": "model-00025-of-00026.safetensors", |
| "layers.40.ffn.experts.w2.biases": "model-00025-of-00026.safetensors", |
| "layers.40.ffn.experts.w2.scales": "model-00025-of-00026.safetensors", |
| "layers.40.ffn.experts.w2.weight": "model-00025-of-00026.safetensors", |
| "layers.40.ffn.experts.w3.biases": "model-00025-of-00026.safetensors", |
| "layers.40.ffn.experts.w3.scales": "model-00025-of-00026.safetensors", |
| "layers.40.ffn.experts.w3.weight": "model-00025-of-00026.safetensors", |
| "layers.40.ffn.gate.bias": "model-00024-of-00026.safetensors", |
| "layers.40.ffn.gate.weight": "model-00024-of-00026.safetensors", |
| "layers.40.ffn.shared_experts.w1.biases": "model-00025-of-00026.safetensors", |
| "layers.40.ffn.shared_experts.w1.scales": "model-00025-of-00026.safetensors", |
| "layers.40.ffn.shared_experts.w1.weight": "model-00025-of-00026.safetensors", |
| "layers.40.ffn.shared_experts.w2.biases": "model-00025-of-00026.safetensors", |
| "layers.40.ffn.shared_experts.w2.scales": "model-00025-of-00026.safetensors", |
| "layers.40.ffn.shared_experts.w2.weight": "model-00025-of-00026.safetensors", |
| "layers.40.ffn.shared_experts.w3.biases": "model-00025-of-00026.safetensors", |
| "layers.40.ffn.shared_experts.w3.scales": "model-00025-of-00026.safetensors", |
| "layers.40.ffn.shared_experts.w3.weight": "model-00025-of-00026.safetensors", |
| "layers.40.ffn_norm.weight": "model-00025-of-00026.safetensors", |
| "layers.40.hc_attn_base": "model-00025-of-00026.safetensors", |
| "layers.40.hc_attn_fn": "model-00025-of-00026.safetensors", |
| "layers.40.hc_attn_scale": "model-00025-of-00026.safetensors", |
| "layers.40.hc_ffn_base": "model-00025-of-00026.safetensors", |
| "layers.40.hc_ffn_fn": "model-00025-of-00026.safetensors", |
| "layers.40.hc_ffn_scale": "model-00025-of-00026.safetensors", |
| "layers.41.attn.attn_sink": "model-00025-of-00026.safetensors", |
| "layers.41.attn.compressor.ape": "model-00025-of-00026.safetensors", |
| "layers.41.attn.compressor.norm.weight": "model-00025-of-00026.safetensors", |
| "layers.41.attn.compressor.wgate.biases": "model-00025-of-00026.safetensors", |
| "layers.41.attn.compressor.wgate.scales": "model-00025-of-00026.safetensors", |
| "layers.41.attn.compressor.wgate.weight": "model-00025-of-00026.safetensors", |
| "layers.41.attn.compressor.wkv.biases": "model-00025-of-00026.safetensors", |
| "layers.41.attn.compressor.wkv.scales": "model-00025-of-00026.safetensors", |
| "layers.41.attn.compressor.wkv.weight": "model-00025-of-00026.safetensors", |
| "layers.41.attn.kv_norm.weight": "model-00025-of-00026.safetensors", |
| "layers.41.attn.q_norm.weight": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wkv.biases": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wkv.scales": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wkv.weight": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.0.biases": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.0.scales": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.0.weight": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.1.biases": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.1.scales": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.1.weight": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.2.biases": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.2.scales": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.2.weight": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.3.biases": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.3.scales": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.3.weight": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.4.biases": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.4.scales": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.4.weight": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.5.biases": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.5.scales": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.5.weight": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.6.biases": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.6.scales": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.6.weight": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.7.biases": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.7.scales": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_a.7.weight": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_b.biases": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_b.scales": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wo_b.weight": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wq_a.biases": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wq_a.scales": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wq_a.weight": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wq_b.biases": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wq_b.scales": "model-00025-of-00026.safetensors", |
| "layers.41.attn.wq_b.weight": "model-00025-of-00026.safetensors", |
| "layers.41.attn_norm.weight": "model-00026-of-00026.safetensors", |
| "layers.41.ffn.experts.w1.biases": "model-00025-of-00026.safetensors", |
| "layers.41.ffn.experts.w1.scales": "model-00025-of-00026.safetensors", |
| "layers.41.ffn.experts.w1.weight": "model-00025-of-00026.safetensors", |
| "layers.41.ffn.experts.w2.biases": "model-00025-of-00026.safetensors", |
| "layers.41.ffn.experts.w2.scales": "model-00025-of-00026.safetensors", |
| "layers.41.ffn.experts.w2.weight": "model-00025-of-00026.safetensors", |
| "layers.41.ffn.experts.w3.biases": "model-00026-of-00026.safetensors", |
| "layers.41.ffn.experts.w3.scales": "model-00026-of-00026.safetensors", |
| "layers.41.ffn.experts.w3.weight": "model-00026-of-00026.safetensors", |
| "layers.41.ffn.gate.bias": "model-00025-of-00026.safetensors", |
| "layers.41.ffn.gate.weight": "model-00025-of-00026.safetensors", |
| "layers.41.ffn.shared_experts.w1.biases": "model-00026-of-00026.safetensors", |
| "layers.41.ffn.shared_experts.w1.scales": "model-00026-of-00026.safetensors", |
| "layers.41.ffn.shared_experts.w1.weight": "model-00026-of-00026.safetensors", |
| "layers.41.ffn.shared_experts.w2.biases": "model-00026-of-00026.safetensors", |
| "layers.41.ffn.shared_experts.w2.scales": "model-00026-of-00026.safetensors", |
| "layers.41.ffn.shared_experts.w2.weight": "model-00026-of-00026.safetensors", |
| "layers.41.ffn.shared_experts.w3.biases": "model-00026-of-00026.safetensors", |
| "layers.41.ffn.shared_experts.w3.scales": "model-00026-of-00026.safetensors", |
| "layers.41.ffn.shared_experts.w3.weight": "model-00026-of-00026.safetensors", |
| "layers.41.ffn_norm.weight": "model-00026-of-00026.safetensors", |
| "layers.41.hc_attn_base": "model-00026-of-00026.safetensors", |
| "layers.41.hc_attn_fn": "model-00026-of-00026.safetensors", |
| "layers.41.hc_attn_scale": "model-00026-of-00026.safetensors", |
| "layers.41.hc_ffn_base": "model-00026-of-00026.safetensors", |
| "layers.41.hc_ffn_fn": "model-00026-of-00026.safetensors", |
| "layers.41.hc_ffn_scale": "model-00026-of-00026.safetensors", |
| "layers.42.attn.attn_sink": "model-00026-of-00026.safetensors", |
| "layers.42.attn.compressor.ape": "model-00026-of-00026.safetensors", |
| "layers.42.attn.compressor.norm.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.compressor.wgate.biases": "model-00026-of-00026.safetensors", |
| "layers.42.attn.compressor.wgate.scales": "model-00026-of-00026.safetensors", |
| "layers.42.attn.compressor.wgate.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.compressor.wkv.biases": "model-00026-of-00026.safetensors", |
| "layers.42.attn.compressor.wkv.scales": "model-00026-of-00026.safetensors", |
| "layers.42.attn.compressor.wkv.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.indexer.compressor.ape": "model-00026-of-00026.safetensors", |
| "layers.42.attn.indexer.compressor.norm.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.indexer.compressor.wgate.biases": "model-00026-of-00026.safetensors", |
| "layers.42.attn.indexer.compressor.wgate.scales": "model-00026-of-00026.safetensors", |
| "layers.42.attn.indexer.compressor.wgate.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.indexer.compressor.wkv.biases": "model-00026-of-00026.safetensors", |
| "layers.42.attn.indexer.compressor.wkv.scales": "model-00026-of-00026.safetensors", |
| "layers.42.attn.indexer.compressor.wkv.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.indexer.weights_proj.biases": "model-00026-of-00026.safetensors", |
| "layers.42.attn.indexer.weights_proj.scales": "model-00026-of-00026.safetensors", |
| "layers.42.attn.indexer.weights_proj.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.indexer.wq_b.biases": "model-00026-of-00026.safetensors", |
| "layers.42.attn.indexer.wq_b.scales": "model-00026-of-00026.safetensors", |
| "layers.42.attn.indexer.wq_b.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.kv_norm.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.q_norm.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wkv.biases": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wkv.scales": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wkv.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.0.biases": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.0.scales": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.0.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.1.biases": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.1.scales": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.1.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.2.biases": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.2.scales": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.2.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.3.biases": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.3.scales": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.3.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.4.biases": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.4.scales": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.4.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.5.biases": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.5.scales": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.5.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.6.biases": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.6.scales": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.6.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.7.biases": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.7.scales": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_a.7.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_b.biases": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_b.scales": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wo_b.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wq_a.biases": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wq_a.scales": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wq_a.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wq_b.biases": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wq_b.scales": "model-00026-of-00026.safetensors", |
| "layers.42.attn.wq_b.weight": "model-00026-of-00026.safetensors", |
| "layers.42.attn_norm.weight": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.experts.w1.biases": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.experts.w1.scales": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.experts.w1.weight": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.experts.w2.biases": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.experts.w2.scales": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.experts.w2.weight": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.experts.w3.biases": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.experts.w3.scales": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.experts.w3.weight": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.gate.bias": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.gate.weight": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.shared_experts.w1.biases": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.shared_experts.w1.scales": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.shared_experts.w1.weight": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.shared_experts.w2.biases": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.shared_experts.w2.scales": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.shared_experts.w2.weight": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.shared_experts.w3.biases": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.shared_experts.w3.scales": "model-00026-of-00026.safetensors", |
| "layers.42.ffn.shared_experts.w3.weight": "model-00026-of-00026.safetensors", |
| "layers.42.ffn_norm.weight": "model-00026-of-00026.safetensors", |
| "layers.42.hc_attn_base": "model-00026-of-00026.safetensors", |
| "layers.42.hc_attn_fn": "model-00026-of-00026.safetensors", |
| "layers.42.hc_attn_scale": "model-00026-of-00026.safetensors", |
| "layers.42.hc_ffn_base": "model-00026-of-00026.safetensors", |
| "layers.42.hc_ffn_fn": "model-00026-of-00026.safetensors", |
| "layers.42.hc_ffn_scale": "model-00026-of-00026.safetensors", |
| "layers.5.attn.attn_sink": "model-00003-of-00026.safetensors", |
| "layers.5.attn.compressor.ape": "model-00003-of-00026.safetensors", |
| "layers.5.attn.compressor.norm.weight": "model-00003-of-00026.safetensors", |
| "layers.5.attn.compressor.wgate.biases": "model-00003-of-00026.safetensors", |
| "layers.5.attn.compressor.wgate.scales": "model-00003-of-00026.safetensors", |
| "layers.5.attn.compressor.wgate.weight": "model-00003-of-00026.safetensors", |
| "layers.5.attn.compressor.wkv.biases": "model-00003-of-00026.safetensors", |
| "layers.5.attn.compressor.wkv.scales": "model-00003-of-00026.safetensors", |
| "layers.5.attn.compressor.wkv.weight": "model-00003-of-00026.safetensors", |
| "layers.5.attn.kv_norm.weight": "model-00003-of-00026.safetensors", |
| "layers.5.attn.q_norm.weight": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wkv.biases": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wkv.scales": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wkv.weight": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.0.biases": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.0.scales": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.0.weight": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.1.biases": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.1.scales": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.1.weight": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.2.biases": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.2.scales": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.2.weight": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.3.biases": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.3.scales": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.3.weight": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.4.biases": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.4.scales": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.4.weight": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.5.biases": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.5.scales": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.5.weight": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.6.biases": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.6.scales": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.6.weight": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.7.biases": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.7.scales": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_a.7.weight": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_b.biases": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_b.scales": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wo_b.weight": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wq_a.biases": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wq_a.scales": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wq_a.weight": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wq_b.biases": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wq_b.scales": "model-00003-of-00026.safetensors", |
| "layers.5.attn.wq_b.weight": "model-00003-of-00026.safetensors", |
| "layers.5.attn_norm.weight": "model-00004-of-00026.safetensors", |
| "layers.5.ffn.experts.w1.biases": "model-00004-of-00026.safetensors", |
| "layers.5.ffn.experts.w1.scales": "model-00004-of-00026.safetensors", |
| "layers.5.ffn.experts.w1.weight": "model-00004-of-00026.safetensors", |
| "layers.5.ffn.experts.w2.biases": "model-00004-of-00026.safetensors", |
| "layers.5.ffn.experts.w2.scales": "model-00004-of-00026.safetensors", |
| "layers.5.ffn.experts.w2.weight": "model-00004-of-00026.safetensors", |
| "layers.5.ffn.experts.w3.biases": "model-00004-of-00026.safetensors", |
| "layers.5.ffn.experts.w3.scales": "model-00004-of-00026.safetensors", |
| "layers.5.ffn.experts.w3.weight": "model-00004-of-00026.safetensors", |
| "layers.5.ffn.gate.bias": "model-00003-of-00026.safetensors", |
| "layers.5.ffn.gate.weight": "model-00003-of-00026.safetensors", |
| "layers.5.ffn.shared_experts.w1.biases": "model-00004-of-00026.safetensors", |
| "layers.5.ffn.shared_experts.w1.scales": "model-00004-of-00026.safetensors", |
| "layers.5.ffn.shared_experts.w1.weight": "model-00004-of-00026.safetensors", |
| "layers.5.ffn.shared_experts.w2.biases": "model-00004-of-00026.safetensors", |
| "layers.5.ffn.shared_experts.w2.scales": "model-00004-of-00026.safetensors", |
| "layers.5.ffn.shared_experts.w2.weight": "model-00004-of-00026.safetensors", |
| "layers.5.ffn.shared_experts.w3.biases": "model-00004-of-00026.safetensors", |
| "layers.5.ffn.shared_experts.w3.scales": "model-00004-of-00026.safetensors", |
| "layers.5.ffn.shared_experts.w3.weight": "model-00004-of-00026.safetensors", |
| "layers.5.ffn_norm.weight": "model-00004-of-00026.safetensors", |
| "layers.5.hc_attn_base": "model-00004-of-00026.safetensors", |
| "layers.5.hc_attn_fn": "model-00004-of-00026.safetensors", |
| "layers.5.hc_attn_scale": "model-00004-of-00026.safetensors", |
| "layers.5.hc_ffn_base": "model-00004-of-00026.safetensors", |
| "layers.5.hc_ffn_fn": "model-00004-of-00026.safetensors", |
| "layers.5.hc_ffn_scale": "model-00004-of-00026.safetensors", |
| "layers.6.attn.attn_sink": "model-00004-of-00026.safetensors", |
| "layers.6.attn.compressor.ape": "model-00004-of-00026.safetensors", |
| "layers.6.attn.compressor.norm.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.compressor.wgate.biases": "model-00004-of-00026.safetensors", |
| "layers.6.attn.compressor.wgate.scales": "model-00004-of-00026.safetensors", |
| "layers.6.attn.compressor.wgate.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.compressor.wkv.biases": "model-00004-of-00026.safetensors", |
| "layers.6.attn.compressor.wkv.scales": "model-00004-of-00026.safetensors", |
| "layers.6.attn.compressor.wkv.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.indexer.compressor.ape": "model-00004-of-00026.safetensors", |
| "layers.6.attn.indexer.compressor.norm.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.indexer.compressor.wgate.biases": "model-00004-of-00026.safetensors", |
| "layers.6.attn.indexer.compressor.wgate.scales": "model-00004-of-00026.safetensors", |
| "layers.6.attn.indexer.compressor.wgate.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.indexer.compressor.wkv.biases": "model-00004-of-00026.safetensors", |
| "layers.6.attn.indexer.compressor.wkv.scales": "model-00004-of-00026.safetensors", |
| "layers.6.attn.indexer.compressor.wkv.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.indexer.weights_proj.biases": "model-00004-of-00026.safetensors", |
| "layers.6.attn.indexer.weights_proj.scales": "model-00004-of-00026.safetensors", |
| "layers.6.attn.indexer.weights_proj.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.indexer.wq_b.biases": "model-00004-of-00026.safetensors", |
| "layers.6.attn.indexer.wq_b.scales": "model-00004-of-00026.safetensors", |
| "layers.6.attn.indexer.wq_b.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.kv_norm.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.q_norm.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wkv.biases": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wkv.scales": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wkv.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.0.biases": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.0.scales": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.0.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.1.biases": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.1.scales": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.1.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.2.biases": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.2.scales": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.2.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.3.biases": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.3.scales": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.3.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.4.biases": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.4.scales": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.4.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.5.biases": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.5.scales": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.5.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.6.biases": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.6.scales": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.6.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.7.biases": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.7.scales": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_a.7.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_b.biases": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_b.scales": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wo_b.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wq_a.biases": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wq_a.scales": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wq_a.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wq_b.biases": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wq_b.scales": "model-00004-of-00026.safetensors", |
| "layers.6.attn.wq_b.weight": "model-00004-of-00026.safetensors", |
| "layers.6.attn_norm.weight": "model-00005-of-00026.safetensors", |
| "layers.6.ffn.experts.w1.biases": "model-00004-of-00026.safetensors", |
| "layers.6.ffn.experts.w1.scales": "model-00004-of-00026.safetensors", |
| "layers.6.ffn.experts.w1.weight": "model-00004-of-00026.safetensors", |
| "layers.6.ffn.experts.w2.biases": "model-00004-of-00026.safetensors", |
| "layers.6.ffn.experts.w2.scales": "model-00004-of-00026.safetensors", |
| "layers.6.ffn.experts.w2.weight": "model-00004-of-00026.safetensors", |
| "layers.6.ffn.experts.w3.biases": "model-00005-of-00026.safetensors", |
| "layers.6.ffn.experts.w3.scales": "model-00005-of-00026.safetensors", |
| "layers.6.ffn.experts.w3.weight": "model-00005-of-00026.safetensors", |
| "layers.6.ffn.gate.bias": "model-00004-of-00026.safetensors", |
| "layers.6.ffn.gate.weight": "model-00004-of-00026.safetensors", |
| "layers.6.ffn.shared_experts.w1.biases": "model-00005-of-00026.safetensors", |
| "layers.6.ffn.shared_experts.w1.scales": "model-00005-of-00026.safetensors", |
| "layers.6.ffn.shared_experts.w1.weight": "model-00005-of-00026.safetensors", |
| "layers.6.ffn.shared_experts.w2.biases": "model-00005-of-00026.safetensors", |
| "layers.6.ffn.shared_experts.w2.scales": "model-00005-of-00026.safetensors", |
| "layers.6.ffn.shared_experts.w2.weight": "model-00005-of-00026.safetensors", |
| "layers.6.ffn.shared_experts.w3.biases": "model-00005-of-00026.safetensors", |
| "layers.6.ffn.shared_experts.w3.scales": "model-00005-of-00026.safetensors", |
| "layers.6.ffn.shared_experts.w3.weight": "model-00005-of-00026.safetensors", |
| "layers.6.ffn_norm.weight": "model-00005-of-00026.safetensors", |
| "layers.6.hc_attn_base": "model-00005-of-00026.safetensors", |
| "layers.6.hc_attn_fn": "model-00005-of-00026.safetensors", |
| "layers.6.hc_attn_scale": "model-00005-of-00026.safetensors", |
| "layers.6.hc_ffn_base": "model-00005-of-00026.safetensors", |
| "layers.6.hc_ffn_fn": "model-00005-of-00026.safetensors", |
| "layers.6.hc_ffn_scale": "model-00005-of-00026.safetensors", |
| "layers.7.attn.attn_sink": "model-00005-of-00026.safetensors", |
| "layers.7.attn.compressor.ape": "model-00005-of-00026.safetensors", |
| "layers.7.attn.compressor.norm.weight": "model-00005-of-00026.safetensors", |
| "layers.7.attn.compressor.wgate.biases": "model-00005-of-00026.safetensors", |
| "layers.7.attn.compressor.wgate.scales": "model-00005-of-00026.safetensors", |
| "layers.7.attn.compressor.wgate.weight": "model-00005-of-00026.safetensors", |
| "layers.7.attn.compressor.wkv.biases": "model-00005-of-00026.safetensors", |
| "layers.7.attn.compressor.wkv.scales": "model-00005-of-00026.safetensors", |
| "layers.7.attn.compressor.wkv.weight": "model-00005-of-00026.safetensors", |
| "layers.7.attn.kv_norm.weight": "model-00005-of-00026.safetensors", |
| "layers.7.attn.q_norm.weight": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wkv.biases": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wkv.scales": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wkv.weight": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.0.biases": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.0.scales": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.0.weight": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.1.biases": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.1.scales": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.1.weight": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.2.biases": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.2.scales": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.2.weight": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.3.biases": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.3.scales": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.3.weight": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.4.biases": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.4.scales": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.4.weight": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.5.biases": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.5.scales": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.5.weight": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.6.biases": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.6.scales": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.6.weight": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.7.biases": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.7.scales": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_a.7.weight": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_b.biases": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_b.scales": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wo_b.weight": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wq_a.biases": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wq_a.scales": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wq_a.weight": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wq_b.biases": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wq_b.scales": "model-00005-of-00026.safetensors", |
| "layers.7.attn.wq_b.weight": "model-00005-of-00026.safetensors", |
| "layers.7.attn_norm.weight": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.experts.w1.biases": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.experts.w1.scales": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.experts.w1.weight": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.experts.w2.biases": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.experts.w2.scales": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.experts.w2.weight": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.experts.w3.biases": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.experts.w3.scales": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.experts.w3.weight": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.gate.bias": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.gate.weight": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.shared_experts.w1.biases": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.shared_experts.w1.scales": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.shared_experts.w1.weight": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.shared_experts.w2.biases": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.shared_experts.w2.scales": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.shared_experts.w2.weight": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.shared_experts.w3.biases": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.shared_experts.w3.scales": "model-00005-of-00026.safetensors", |
| "layers.7.ffn.shared_experts.w3.weight": "model-00005-of-00026.safetensors", |
| "layers.7.ffn_norm.weight": "model-00005-of-00026.safetensors", |
| "layers.7.hc_attn_base": "model-00005-of-00026.safetensors", |
| "layers.7.hc_attn_fn": "model-00005-of-00026.safetensors", |
| "layers.7.hc_attn_scale": "model-00005-of-00026.safetensors", |
| "layers.7.hc_ffn_base": "model-00005-of-00026.safetensors", |
| "layers.7.hc_ffn_fn": "model-00005-of-00026.safetensors", |
| "layers.7.hc_ffn_scale": "model-00005-of-00026.safetensors", |
| "layers.8.attn.attn_sink": "model-00005-of-00026.safetensors", |
| "layers.8.attn.compressor.ape": "model-00005-of-00026.safetensors", |
| "layers.8.attn.compressor.norm.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.compressor.wgate.biases": "model-00005-of-00026.safetensors", |
| "layers.8.attn.compressor.wgate.scales": "model-00005-of-00026.safetensors", |
| "layers.8.attn.compressor.wgate.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.compressor.wkv.biases": "model-00005-of-00026.safetensors", |
| "layers.8.attn.compressor.wkv.scales": "model-00005-of-00026.safetensors", |
| "layers.8.attn.compressor.wkv.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.indexer.compressor.ape": "model-00005-of-00026.safetensors", |
| "layers.8.attn.indexer.compressor.norm.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.indexer.compressor.wgate.biases": "model-00005-of-00026.safetensors", |
| "layers.8.attn.indexer.compressor.wgate.scales": "model-00005-of-00026.safetensors", |
| "layers.8.attn.indexer.compressor.wgate.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.indexer.compressor.wkv.biases": "model-00005-of-00026.safetensors", |
| "layers.8.attn.indexer.compressor.wkv.scales": "model-00005-of-00026.safetensors", |
| "layers.8.attn.indexer.compressor.wkv.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.indexer.weights_proj.biases": "model-00005-of-00026.safetensors", |
| "layers.8.attn.indexer.weights_proj.scales": "model-00005-of-00026.safetensors", |
| "layers.8.attn.indexer.weights_proj.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.indexer.wq_b.biases": "model-00005-of-00026.safetensors", |
| "layers.8.attn.indexer.wq_b.scales": "model-00005-of-00026.safetensors", |
| "layers.8.attn.indexer.wq_b.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.kv_norm.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.q_norm.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wkv.biases": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wkv.scales": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wkv.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.0.biases": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.0.scales": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.0.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.1.biases": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.1.scales": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.1.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.2.biases": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.2.scales": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.2.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.3.biases": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.3.scales": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.3.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.4.biases": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.4.scales": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.4.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.5.biases": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.5.scales": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.5.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.6.biases": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.6.scales": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.6.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.7.biases": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.7.scales": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_a.7.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_b.biases": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_b.scales": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wo_b.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wq_a.biases": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wq_a.scales": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wq_a.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wq_b.biases": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wq_b.scales": "model-00005-of-00026.safetensors", |
| "layers.8.attn.wq_b.weight": "model-00005-of-00026.safetensors", |
| "layers.8.attn_norm.weight": "model-00006-of-00026.safetensors", |
| "layers.8.ffn.experts.w1.biases": "model-00005-of-00026.safetensors", |
| "layers.8.ffn.experts.w1.scales": "model-00005-of-00026.safetensors", |
| "layers.8.ffn.experts.w1.weight": "model-00005-of-00026.safetensors", |
| "layers.8.ffn.experts.w2.biases": "model-00006-of-00026.safetensors", |
| "layers.8.ffn.experts.w2.scales": "model-00006-of-00026.safetensors", |
| "layers.8.ffn.experts.w2.weight": "model-00006-of-00026.safetensors", |
| "layers.8.ffn.experts.w3.biases": "model-00006-of-00026.safetensors", |
| "layers.8.ffn.experts.w3.scales": "model-00006-of-00026.safetensors", |
| "layers.8.ffn.experts.w3.weight": "model-00006-of-00026.safetensors", |
| "layers.8.ffn.gate.bias": "model-00005-of-00026.safetensors", |
| "layers.8.ffn.gate.weight": "model-00005-of-00026.safetensors", |
| "layers.8.ffn.shared_experts.w1.biases": "model-00006-of-00026.safetensors", |
| "layers.8.ffn.shared_experts.w1.scales": "model-00006-of-00026.safetensors", |
| "layers.8.ffn.shared_experts.w1.weight": "model-00006-of-00026.safetensors", |
| "layers.8.ffn.shared_experts.w2.biases": "model-00006-of-00026.safetensors", |
| "layers.8.ffn.shared_experts.w2.scales": "model-00006-of-00026.safetensors", |
| "layers.8.ffn.shared_experts.w2.weight": "model-00006-of-00026.safetensors", |
| "layers.8.ffn.shared_experts.w3.biases": "model-00006-of-00026.safetensors", |
| "layers.8.ffn.shared_experts.w3.scales": "model-00006-of-00026.safetensors", |
| "layers.8.ffn.shared_experts.w3.weight": "model-00006-of-00026.safetensors", |
| "layers.8.ffn_norm.weight": "model-00006-of-00026.safetensors", |
| "layers.8.hc_attn_base": "model-00006-of-00026.safetensors", |
| "layers.8.hc_attn_fn": "model-00006-of-00026.safetensors", |
| "layers.8.hc_attn_scale": "model-00006-of-00026.safetensors", |
| "layers.8.hc_ffn_base": "model-00006-of-00026.safetensors", |
| "layers.8.hc_ffn_fn": "model-00006-of-00026.safetensors", |
| "layers.8.hc_ffn_scale": "model-00006-of-00026.safetensors", |
| "layers.9.attn.attn_sink": "model-00006-of-00026.safetensors", |
| "layers.9.attn.compressor.ape": "model-00006-of-00026.safetensors", |
| "layers.9.attn.compressor.norm.weight": "model-00006-of-00026.safetensors", |
| "layers.9.attn.compressor.wgate.biases": "model-00006-of-00026.safetensors", |
| "layers.9.attn.compressor.wgate.scales": "model-00006-of-00026.safetensors", |
| "layers.9.attn.compressor.wgate.weight": "model-00006-of-00026.safetensors", |
| "layers.9.attn.compressor.wkv.biases": "model-00006-of-00026.safetensors", |
| "layers.9.attn.compressor.wkv.scales": "model-00006-of-00026.safetensors", |
| "layers.9.attn.compressor.wkv.weight": "model-00006-of-00026.safetensors", |
| "layers.9.attn.kv_norm.weight": "model-00006-of-00026.safetensors", |
| "layers.9.attn.q_norm.weight": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wkv.biases": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wkv.scales": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wkv.weight": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.0.biases": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.0.scales": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.0.weight": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.1.biases": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.1.scales": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.1.weight": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.2.biases": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.2.scales": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.2.weight": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.3.biases": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.3.scales": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.3.weight": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.4.biases": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.4.scales": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.4.weight": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.5.biases": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.5.scales": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.5.weight": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.6.biases": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.6.scales": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.6.weight": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.7.biases": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.7.scales": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_a.7.weight": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_b.biases": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_b.scales": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wo_b.weight": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wq_a.biases": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wq_a.scales": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wq_a.weight": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wq_b.biases": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wq_b.scales": "model-00006-of-00026.safetensors", |
| "layers.9.attn.wq_b.weight": "model-00006-of-00026.safetensors", |
| "layers.9.attn_norm.weight": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.experts.w1.biases": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.experts.w1.scales": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.experts.w1.weight": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.experts.w2.biases": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.experts.w2.scales": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.experts.w2.weight": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.experts.w3.biases": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.experts.w3.scales": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.experts.w3.weight": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.gate.bias": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.gate.weight": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.shared_experts.w1.biases": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.shared_experts.w1.scales": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.shared_experts.w1.weight": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.shared_experts.w2.biases": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.shared_experts.w2.scales": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.shared_experts.w2.weight": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.shared_experts.w3.biases": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.shared_experts.w3.scales": "model-00006-of-00026.safetensors", |
| "layers.9.ffn.shared_experts.w3.weight": "model-00006-of-00026.safetensors", |
| "layers.9.ffn_norm.weight": "model-00006-of-00026.safetensors", |
| "layers.9.hc_attn_base": "model-00006-of-00026.safetensors", |
| "layers.9.hc_attn_fn": "model-00006-of-00026.safetensors", |
| "layers.9.hc_attn_scale": "model-00006-of-00026.safetensors", |
| "layers.9.hc_ffn_base": "model-00006-of-00026.safetensors", |
| "layers.9.hc_ffn_fn": "model-00006-of-00026.safetensors", |
| "layers.9.hc_ffn_scale": "model-00006-of-00026.safetensors", |
| "norm.weight": "model-00026-of-00026.safetensors" |
| } |
| } |