| { |
| "metadata": { |
| "ParamSize": 245, |
| "ParamBytes": 1181032448.0, |
| "BitsPerParam": 5.001434275387379 |
| }, |
| "records": [ |
| { |
| "dataPath": "params_shard_0.bin", |
| "format": "raw-shard", |
| "nbytes": 25956352, |
| "records": [ |
| { |
| "name": "model.layers.0.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.0.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 2097152 |
| }, |
| { |
| "name": "model.layers.0.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "model.layers.0.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 6553600 |
| }, |
| { |
| "name": "model.layers.0.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 7077888 |
| }, |
| { |
| "name": "model.layers.0.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 7081984 |
| }, |
| { |
| "name": "model.layers.0.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 23859200 |
| } |
| ], |
| "md5sum": "5982b1a0ff5d47fb49083abe86b92858" |
| }, |
| { |
| "dataPath": "params_shard_1.bin", |
| "format": "raw-shard", |
| "nbytes": 33300480, |
| "records": [ |
| { |
| "name": "model.layers.0.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.0.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.0.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.1.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 9441280 |
| }, |
| { |
| "name": "model.layers.1.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 11538432 |
| }, |
| { |
| "name": "model.layers.1.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 11800576 |
| }, |
| { |
| "name": "model.layers.1.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 15994880 |
| }, |
| { |
| "name": "model.layers.1.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 16519168 |
| }, |
| { |
| "name": "model.layers.1.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 16523264 |
| } |
| ], |
| "md5sum": "64eb873ff68fc9122f879e0ab6caadcd" |
| }, |
| { |
| "dataPath": "params_shard_2.bin", |
| "format": "raw-shard", |
| "nbytes": 16777216, |
| "records": [ |
| { |
| "name": "model.layers.10.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3132911e8d81320356409a8468d7c5ce" |
| }, |
| { |
| "dataPath": "params_shard_3.bin", |
| "format": "raw-shard", |
| "nbytes": 32518144, |
| "records": [ |
| { |
| "name": "model.layers.1.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.1.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 2097152 |
| }, |
| { |
| "name": "model.layers.1.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 10485760 |
| }, |
| { |
| "name": "model.layers.1.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 11534336 |
| }, |
| { |
| "name": "model.layers.10.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 11538432 |
| }, |
| { |
| "name": "model.layers.10.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 13635584 |
| }, |
| { |
| "name": "model.layers.10.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 13897728 |
| }, |
| { |
| "name": "model.layers.10.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 18092032 |
| }, |
| { |
| "name": "model.layers.10.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 18616320 |
| }, |
| { |
| "name": "model.layers.10.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 18620416 |
| }, |
| { |
| "name": "model.layers.10.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 20717568 |
| }, |
| { |
| "name": "model.layers.10.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 29106176 |
| }, |
| { |
| "name": "model.layers.10.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 30154752 |
| }, |
| { |
| "name": "model.layers.11.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 30158848 |
| }, |
| { |
| "name": "model.layers.11.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 32256000 |
| } |
| ], |
| "md5sum": "0713e4f1d5591fe93fc08c70b7b98504" |
| }, |
| { |
| "dataPath": "params_shard_4.bin", |
| "format": "raw-shard", |
| "nbytes": 33038336, |
| "records": [ |
| { |
| "name": "model.layers.11.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.11.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 4194304 |
| }, |
| { |
| "name": "model.layers.11.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "model.layers.11.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 4722688 |
| }, |
| { |
| "name": "model.layers.11.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 21499904 |
| }, |
| { |
| "name": "model.layers.11.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 23597056 |
| }, |
| { |
| "name": "model.layers.11.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 31985664 |
| }, |
| { |
| "name": "model.layers.11.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 33034240 |
| } |
| ], |
| "md5sum": "aed4bb9286e179b2cd20bc66624fd49b" |
| }, |
| { |
| "dataPath": "params_shard_5.bin", |
| "format": "raw-shard", |
| "nbytes": 16777216, |
| "records": [ |
| { |
| "name": "model.layers.2.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0cdd09ac06863fd01405adaedfc0ed54" |
| }, |
| { |
| "dataPath": "params_shard_6.bin", |
| "format": "raw-shard", |
| "nbytes": 33034240, |
| "records": [ |
| { |
| "name": "model.layers.12.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.12.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 2097152 |
| }, |
| { |
| "name": "model.layers.12.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "model.layers.12.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 6553600 |
| }, |
| { |
| "name": "model.layers.12.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 7077888 |
| }, |
| { |
| "name": "model.layers.12.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 23855104 |
| }, |
| { |
| "name": "model.layers.2.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 25952256 |
| }, |
| { |
| "name": "model.layers.2.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 28049408 |
| }, |
| { |
| "name": "model.layers.2.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 28311552 |
| }, |
| { |
| "name": "model.layers.2.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 32505856 |
| }, |
| { |
| "name": "model.layers.2.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 33030144 |
| } |
| ], |
| "md5sum": "b071b00e03d7688201db6b4edc29f6d0" |
| }, |
| { |
| "dataPath": "params_shard_7.bin", |
| "format": "raw-shard", |
| "nbytes": 16777216, |
| "records": [ |
| { |
| "name": "model.layers.3.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f664ca5b613a1c0f280242ba05e9b0c7" |
| }, |
| { |
| "dataPath": "params_shard_8.bin", |
| "format": "raw-shard", |
| "nbytes": 32518144, |
| "records": [ |
| { |
| "name": "model.layers.2.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.2.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 2097152 |
| }, |
| { |
| "name": "model.layers.2.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 10485760 |
| }, |
| { |
| "name": "model.layers.2.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 11534336 |
| }, |
| { |
| "name": "model.layers.3.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 11538432 |
| }, |
| { |
| "name": "model.layers.3.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 13635584 |
| }, |
| { |
| "name": "model.layers.3.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 13897728 |
| }, |
| { |
| "name": "model.layers.3.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 18092032 |
| }, |
| { |
| "name": "model.layers.3.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 18616320 |
| }, |
| { |
| "name": "model.layers.3.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 18620416 |
| }, |
| { |
| "name": "model.layers.3.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 20717568 |
| }, |
| { |
| "name": "model.layers.3.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 29106176 |
| }, |
| { |
| "name": "model.layers.3.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 30154752 |
| }, |
| { |
| "name": "model.layers.4.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 30158848 |
| }, |
| { |
| "name": "model.layers.4.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 32256000 |
| } |
| ], |
| "md5sum": "7e085924b79dc127318a7fe0e9d54d84" |
| }, |
| { |
| "dataPath": "params_shard_9.bin", |
| "format": "raw-shard", |
| "nbytes": 33038336, |
| "records": [ |
| { |
| "name": "model.layers.4.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.4.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 4194304 |
| }, |
| { |
| "name": "model.layers.4.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "model.layers.4.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 4722688 |
| }, |
| { |
| "name": "model.layers.4.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 21499904 |
| }, |
| { |
| "name": "model.layers.4.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 23597056 |
| }, |
| { |
| "name": "model.layers.4.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 31985664 |
| }, |
| { |
| "name": "model.layers.4.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 33034240 |
| } |
| ], |
| "md5sum": "d9bd5f3ad5f5628af8466af224677f47" |
| }, |
| { |
| "dataPath": "params_shard_10.bin", |
| "format": "raw-shard", |
| "nbytes": 25956352, |
| "records": [ |
| { |
| "name": "model.layers.5.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.5.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 2097152 |
| }, |
| { |
| "name": "model.layers.5.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "model.layers.5.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 6553600 |
| }, |
| { |
| "name": "model.layers.5.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 7077888 |
| }, |
| { |
| "name": "model.layers.5.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 7081984 |
| }, |
| { |
| "name": "model.layers.5.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 23859200 |
| } |
| ], |
| "md5sum": "7dc76a6ffeda959a9c82df5c246aa93e" |
| }, |
| { |
| "dataPath": "params_shard_11.bin", |
| "format": "raw-shard", |
| "nbytes": 33300480, |
| "records": [ |
| { |
| "name": "model.layers.5.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.5.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.5.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.6.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 9441280 |
| }, |
| { |
| "name": "model.layers.6.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 11538432 |
| }, |
| { |
| "name": "model.layers.6.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 11800576 |
| }, |
| { |
| "name": "model.layers.6.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 15994880 |
| }, |
| { |
| "name": "model.layers.6.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 16519168 |
| }, |
| { |
| "name": "model.layers.6.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 16523264 |
| } |
| ], |
| "md5sum": "2ef931e5e99fb8573d612e5d70dab708" |
| }, |
| { |
| "dataPath": "params_shard_12.bin", |
| "format": "raw-shard", |
| "nbytes": 16777216, |
| "records": [ |
| { |
| "name": "model.layers.7.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "030743fe289e724247fd7adf38008118" |
| }, |
| { |
| "dataPath": "params_shard_13.bin", |
| "format": "raw-shard", |
| "nbytes": 32518144, |
| "records": [ |
| { |
| "name": "model.layers.6.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.6.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 2097152 |
| }, |
| { |
| "name": "model.layers.6.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 10485760 |
| }, |
| { |
| "name": "model.layers.6.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 11534336 |
| }, |
| { |
| "name": "model.layers.7.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 11538432 |
| }, |
| { |
| "name": "model.layers.7.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 13635584 |
| }, |
| { |
| "name": "model.layers.7.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 13897728 |
| }, |
| { |
| "name": "model.layers.7.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 18092032 |
| }, |
| { |
| "name": "model.layers.7.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 18616320 |
| }, |
| { |
| "name": "model.layers.7.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 18620416 |
| }, |
| { |
| "name": "model.layers.7.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 20717568 |
| }, |
| { |
| "name": "model.layers.7.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 29106176 |
| }, |
| { |
| "name": "model.layers.7.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 30154752 |
| }, |
| { |
| "name": "model.layers.8.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 30158848 |
| }, |
| { |
| "name": "model.layers.8.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 32256000 |
| } |
| ], |
| "md5sum": "b375e3d842f235e9a3396a866363e7a9" |
| }, |
| { |
| "dataPath": "params_shard_14.bin", |
| "format": "raw-shard", |
| "nbytes": 33038336, |
| "records": [ |
| { |
| "name": "model.layers.8.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.8.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 4194304 |
| }, |
| { |
| "name": "model.layers.8.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "model.layers.8.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 4722688 |
| }, |
| { |
| "name": "model.layers.8.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 21499904 |
| }, |
| { |
| "name": "model.layers.8.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 23597056 |
| }, |
| { |
| "name": "model.layers.8.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 31985664 |
| }, |
| { |
| "name": "model.layers.8.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 33034240 |
| } |
| ], |
| "md5sum": "12df07ae8fe2a1d9bb0f183eb87117ed" |
| }, |
| { |
| "dataPath": "params_shard_15.bin", |
| "format": "raw-shard", |
| "nbytes": 25956352, |
| "records": [ |
| { |
| "name": "model.layers.9.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.9.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 2097152 |
| }, |
| { |
| "name": "model.layers.9.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "model.layers.9.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 6553600 |
| }, |
| { |
| "name": "model.layers.9.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 7077888 |
| }, |
| { |
| "name": "model.layers.9.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 7081984 |
| }, |
| { |
| "name": "model.layers.9.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 23859200 |
| } |
| ], |
| "md5sum": "08b5608ddf4672a3df93f00959e9a675" |
| }, |
| { |
| "dataPath": "params_shard_16.bin", |
| "format": "raw-shard", |
| "nbytes": 94765056, |
| "records": [ |
| { |
| "name": "model.tok_embeddings.q_weight", |
| "shape": [ |
| 92544, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 94765056, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "67dc2498aa1a20270d63e6b745a9a499" |
| }, |
| { |
| "dataPath": "params_shard_17.bin", |
| "format": "raw-shard", |
| "nbytes": 33091584, |
| "records": [ |
| { |
| "name": "model.layers.9.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.9.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.9.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.tok_embeddings.q_scale", |
| "shape": [ |
| 92544, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 11845632, |
| "byteOffset": 9441280 |
| }, |
| { |
| "name": "model.layers.12.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 21286912 |
| }, |
| { |
| "name": "model.layers.12.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 21291008 |
| }, |
| { |
| "name": "model.layers.12.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 29679616 |
| }, |
| { |
| "name": "model.layers.12.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 30728192 |
| }, |
| { |
| "name": "model.layers.13.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 30732288 |
| }, |
| { |
| "name": "model.layers.13.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 32829440 |
| } |
| ], |
| "md5sum": "4b990590c06ffcc04429297bac1e57ad" |
| }, |
| { |
| "dataPath": "params_shard_18.bin", |
| "format": "raw-shard", |
| "nbytes": 33038336, |
| "records": [ |
| { |
| "name": "model.layers.13.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.13.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 4194304 |
| }, |
| { |
| "name": "model.layers.13.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "model.layers.13.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 4722688 |
| }, |
| { |
| "name": "model.layers.13.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 21499904 |
| }, |
| { |
| "name": "model.layers.13.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 23597056 |
| }, |
| { |
| "name": "model.layers.13.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 31985664 |
| }, |
| { |
| "name": "model.layers.13.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 33034240 |
| } |
| ], |
| "md5sum": "a6774268678737ecada36ccdabfcaea7" |
| }, |
| { |
| "dataPath": "params_shard_19.bin", |
| "format": "raw-shard", |
| "nbytes": 25956352, |
| "records": [ |
| { |
| "name": "model.layers.14.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.14.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 2097152 |
| }, |
| { |
| "name": "model.layers.14.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "model.layers.14.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 6553600 |
| }, |
| { |
| "name": "model.layers.14.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 7077888 |
| }, |
| { |
| "name": "model.layers.14.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 7081984 |
| }, |
| { |
| "name": "model.layers.14.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 23859200 |
| } |
| ], |
| "md5sum": "a5c18cea65123d13caaa92f5ac458867" |
| }, |
| { |
| "dataPath": "params_shard_20.bin", |
| "format": "raw-shard", |
| "nbytes": 33300480, |
| "records": [ |
| { |
| "name": "model.layers.14.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.14.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.14.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.15.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 9441280 |
| }, |
| { |
| "name": "model.layers.15.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 11538432 |
| }, |
| { |
| "name": "model.layers.15.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 11800576 |
| }, |
| { |
| "name": "model.layers.15.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 15994880 |
| }, |
| { |
| "name": "model.layers.15.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 16519168 |
| }, |
| { |
| "name": "model.layers.15.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 16523264 |
| } |
| ], |
| "md5sum": "197ab45a9aa160a5e8285e210e8159f1" |
| }, |
| { |
| "dataPath": "params_shard_21.bin", |
| "format": "raw-shard", |
| "nbytes": 16777216, |
| "records": [ |
| { |
| "name": "model.layers.16.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a4c38f0335d1fef1d9cf0c21831030c2" |
| }, |
| { |
| "dataPath": "params_shard_22.bin", |
| "format": "raw-shard", |
| "nbytes": 32518144, |
| "records": [ |
| { |
| "name": "model.layers.15.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.15.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 2097152 |
| }, |
| { |
| "name": "model.layers.15.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 10485760 |
| }, |
| { |
| "name": "model.layers.15.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 11534336 |
| }, |
| { |
| "name": "model.layers.16.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 11538432 |
| }, |
| { |
| "name": "model.layers.16.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 13635584 |
| }, |
| { |
| "name": "model.layers.16.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 13897728 |
| }, |
| { |
| "name": "model.layers.16.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 18092032 |
| }, |
| { |
| "name": "model.layers.16.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 18616320 |
| }, |
| { |
| "name": "model.layers.16.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 18620416 |
| }, |
| { |
| "name": "model.layers.16.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 20717568 |
| }, |
| { |
| "name": "model.layers.16.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 29106176 |
| }, |
| { |
| "name": "model.layers.16.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 30154752 |
| }, |
| { |
| "name": "model.layers.17.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 30158848 |
| }, |
| { |
| "name": "model.layers.17.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 32256000 |
| } |
| ], |
| "md5sum": "6cbe86517dbc07beb965d42d11021637" |
| }, |
| { |
| "dataPath": "params_shard_23.bin", |
| "format": "raw-shard", |
| "nbytes": 33038336, |
| "records": [ |
| { |
| "name": "model.layers.17.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.17.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 4194304 |
| }, |
| { |
| "name": "model.layers.17.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "model.layers.17.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 4722688 |
| }, |
| { |
| "name": "model.layers.17.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 21499904 |
| }, |
| { |
| "name": "model.layers.17.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 23597056 |
| }, |
| { |
| "name": "model.layers.17.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 31985664 |
| }, |
| { |
| "name": "model.layers.17.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 33034240 |
| } |
| ], |
| "md5sum": "68618a00e2d3d59f52964b96673be173" |
| }, |
| { |
| "dataPath": "params_shard_24.bin", |
| "format": "raw-shard", |
| "nbytes": 25956352, |
| "records": [ |
| { |
| "name": "model.layers.18.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.18.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 2097152 |
| }, |
| { |
| "name": "model.layers.18.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "model.layers.18.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 6553600 |
| }, |
| { |
| "name": "model.layers.18.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 7077888 |
| }, |
| { |
| "name": "model.layers.18.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 7081984 |
| }, |
| { |
| "name": "model.layers.18.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 23859200 |
| } |
| ], |
| "md5sum": "26d6b23b1aa233c466f97fcb4b8e294a" |
| }, |
| { |
| "dataPath": "params_shard_25.bin", |
| "format": "raw-shard", |
| "nbytes": 33300480, |
| "records": [ |
| { |
| "name": "model.layers.18.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.18.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.18.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.19.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 9441280 |
| }, |
| { |
| "name": "model.layers.19.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 11538432 |
| }, |
| { |
| "name": "model.layers.19.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 11800576 |
| }, |
| { |
| "name": "model.layers.19.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 15994880 |
| }, |
| { |
| "name": "model.layers.19.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 16519168 |
| }, |
| { |
| "name": "model.layers.19.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 16523264 |
| } |
| ], |
| "md5sum": "861fd000bb0abff1040f3a108ad35fd9" |
| }, |
| { |
| "dataPath": "params_shard_26.bin", |
| "format": "raw-shard", |
| "nbytes": 16777216, |
| "records": [ |
| { |
| "name": "model.layers.20.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ab0b73669c4bb8a74f61883ef5830e76" |
| }, |
| { |
| "dataPath": "params_shard_27.bin", |
| "format": "raw-shard", |
| "nbytes": 32518144, |
| "records": [ |
| { |
| "name": "model.layers.19.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.19.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 2097152 |
| }, |
| { |
| "name": "model.layers.19.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 10485760 |
| }, |
| { |
| "name": "model.layers.19.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 11534336 |
| }, |
| { |
| "name": "model.layers.20.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 11538432 |
| }, |
| { |
| "name": "model.layers.20.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 13635584 |
| }, |
| { |
| "name": "model.layers.20.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 13897728 |
| }, |
| { |
| "name": "model.layers.20.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 18092032 |
| }, |
| { |
| "name": "model.layers.20.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 18616320 |
| }, |
| { |
| "name": "model.layers.20.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 18620416 |
| }, |
| { |
| "name": "model.layers.20.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 20717568 |
| }, |
| { |
| "name": "model.layers.20.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 29106176 |
| }, |
| { |
| "name": "model.layers.20.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 30154752 |
| }, |
| { |
| "name": "model.layers.21.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 30158848 |
| }, |
| { |
| "name": "model.layers.21.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 32256000 |
| } |
| ], |
| "md5sum": "81c0185dcc3d99d0bc4ecdce74544249" |
| }, |
| { |
| "dataPath": "params_shard_28.bin", |
| "format": "raw-shard", |
| "nbytes": 33038336, |
| "records": [ |
| { |
| "name": "model.layers.21.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.21.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 4194304 |
| }, |
| { |
| "name": "model.layers.21.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 4718592 |
| }, |
| { |
| "name": "model.layers.21.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 4722688 |
| }, |
| { |
| "name": "model.layers.21.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 21499904 |
| }, |
| { |
| "name": "model.layers.21.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 23597056 |
| }, |
| { |
| "name": "model.layers.21.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 31985664 |
| }, |
| { |
| "name": "model.layers.21.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 33034240 |
| } |
| ], |
| "md5sum": "4aafcf706d6f0cbdae9380482acbc626" |
| }, |
| { |
| "dataPath": "params_shard_29.bin", |
| "format": "raw-shard", |
| "nbytes": 25956352, |
| "records": [ |
| { |
| "name": "model.layers.22.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.22.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 2097152 |
| }, |
| { |
| "name": "model.layers.22.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "model.layers.22.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 6553600 |
| }, |
| { |
| "name": "model.layers.22.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 7077888 |
| }, |
| { |
| "name": "model.layers.22.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 7081984 |
| }, |
| { |
| "name": "model.layers.22.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 23859200 |
| } |
| ], |
| "md5sum": "3cd1fd3dba69c5fae94777943a2870e8" |
| }, |
| { |
| "dataPath": "params_shard_30.bin", |
| "format": "raw-shard", |
| "nbytes": 33300480, |
| "records": [ |
| { |
| "name": "model.layers.22.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.22.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 8388608 |
| }, |
| { |
| "name": "model.layers.22.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "model.layers.23.attention.wo.q_weight", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 9441280 |
| }, |
| { |
| "name": "model.layers.23.attention.wo.q_scale", |
| "shape": [ |
| 2048, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 262144, |
| "byteOffset": 11538432 |
| }, |
| { |
| "name": "model.layers.23.attention.wqkv.q_weight", |
| "shape": [ |
| 4096, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 4194304, |
| "byteOffset": 11800576 |
| }, |
| { |
| "name": "model.layers.23.attention.wqkv.q_scale", |
| "shape": [ |
| 4096, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 524288, |
| "byteOffset": 15994880 |
| }, |
| { |
| "name": "model.layers.23.attention_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 16519168 |
| }, |
| { |
| "name": "model.layers.23.feed_forward.gate_up_proj.q_weight", |
| "shape": [ |
| 16384, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 16777216, |
| "byteOffset": 16523264 |
| } |
| ], |
| "md5sum": "5d7c1eb949fa2bf4a6ca95dad376adc8" |
| }, |
| { |
| "dataPath": "params_shard_31.bin", |
| "format": "raw-shard", |
| "nbytes": 94765056, |
| "records": [ |
| { |
| "name": "output.q_weight", |
| "shape": [ |
| 92544, |
| 256 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 94765056, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5da990f9733817e5aa84b925a3c1d55e" |
| }, |
| { |
| "dataPath": "params_shard_32.bin", |
| "format": "raw-shard", |
| "nbytes": 23388160, |
| "records": [ |
| { |
| "name": "model.layers.23.feed_forward.gate_up_proj.q_scale", |
| "shape": [ |
| 16384, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 2097152, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.layers.23.feed_forward.w2.q_weight", |
| "shape": [ |
| 2048, |
| 1024 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 8388608, |
| "byteOffset": 2097152 |
| }, |
| { |
| "name": "model.layers.23.feed_forward.w2.q_scale", |
| "shape": [ |
| 2048, |
| 256 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 1048576, |
| "byteOffset": 10485760 |
| }, |
| { |
| "name": "model.layers.23.ffn_norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 11534336 |
| }, |
| { |
| "name": "model.norm.weight", |
| "shape": [ |
| 2048 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 4096, |
| "byteOffset": 11538432 |
| }, |
| { |
| "name": "output.q_scale", |
| "shape": [ |
| 92544, |
| 64 |
| ], |
| "dtype": "float32", |
| "format": "f32-to-bf16", |
| "nbytes": 11845632, |
| "byteOffset": 11542528 |
| } |
| ], |
| "md5sum": "0bcee4e0f8bc115d91dd47c221506095" |
| } |
| ] |
| } |