NexVeridian commited on
Commit
e834e10
·
verified ·
1 Parent(s): aba1b56

Add files using upload-large-folder tool

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c41e086a492dd78167ed830094ddfc38fdb5766e500be38e7361f84eaa9dfbaf
3
- size 3796838947
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55ae5326e43f917cf839dfdd4f9790dd24a992a8e70353a3707916cb7f8e4d21
3
+ size 3752083673
model.safetensors.index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "metadata": {
3
- "total_size": 6109102676,
4
- "total_parameters": 7518068992
5
  },
6
  "weight_map": {
7
  "language_model.model.embed_tokens.biases": "model-00001-of-00002.safetensors",
@@ -626,10 +626,6 @@
626
  "language_model.model.layers.24.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
627
  "language_model.model.layers.24.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
628
  "language_model.model.layers.24.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
629
- "language_model.model.layers.24.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
630
- "language_model.model.layers.24.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
631
- "language_model.model.layers.24.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
632
- "language_model.model.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
633
  "language_model.model.layers.24.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
634
  "language_model.model.layers.24.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
635
  "language_model.model.layers.24.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -637,9 +633,6 @@
637
  "language_model.model.layers.24.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
638
  "language_model.model.layers.24.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
639
  "language_model.model.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
640
- "language_model.model.layers.24.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
641
- "language_model.model.layers.24.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
642
- "language_model.model.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
643
  "language_model.model.layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors",
644
  "language_model.model.layers.25.layer_scalar": "model-00001-of-00002.safetensors",
645
  "language_model.model.layers.25.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
@@ -661,10 +654,6 @@
661
  "language_model.model.layers.25.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
662
  "language_model.model.layers.25.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
663
  "language_model.model.layers.25.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
664
- "language_model.model.layers.25.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
665
- "language_model.model.layers.25.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
666
- "language_model.model.layers.25.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
667
- "language_model.model.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
668
  "language_model.model.layers.25.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
669
  "language_model.model.layers.25.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
670
  "language_model.model.layers.25.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -672,9 +661,6 @@
672
  "language_model.model.layers.25.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
673
  "language_model.model.layers.25.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
674
  "language_model.model.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
675
- "language_model.model.layers.25.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
676
- "language_model.model.layers.25.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
677
- "language_model.model.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
678
  "language_model.model.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors",
679
  "language_model.model.layers.26.layer_scalar": "model-00001-of-00002.safetensors",
680
  "language_model.model.layers.26.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
@@ -696,10 +682,6 @@
696
  "language_model.model.layers.26.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
697
  "language_model.model.layers.26.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
698
  "language_model.model.layers.26.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
699
- "language_model.model.layers.26.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
700
- "language_model.model.layers.26.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
701
- "language_model.model.layers.26.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
702
- "language_model.model.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
703
  "language_model.model.layers.26.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
704
  "language_model.model.layers.26.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
705
  "language_model.model.layers.26.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -707,9 +689,6 @@
707
  "language_model.model.layers.26.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
708
  "language_model.model.layers.26.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
709
  "language_model.model.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
710
- "language_model.model.layers.26.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
711
- "language_model.model.layers.26.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
712
- "language_model.model.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
713
  "language_model.model.layers.27.input_layernorm.weight": "model-00001-of-00002.safetensors",
714
  "language_model.model.layers.27.layer_scalar": "model-00001-of-00002.safetensors",
715
  "language_model.model.layers.27.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
@@ -731,10 +710,6 @@
731
  "language_model.model.layers.27.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
732
  "language_model.model.layers.27.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
733
  "language_model.model.layers.27.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
734
- "language_model.model.layers.27.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
735
- "language_model.model.layers.27.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
736
- "language_model.model.layers.27.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
737
- "language_model.model.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
738
  "language_model.model.layers.27.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
739
  "language_model.model.layers.27.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
740
  "language_model.model.layers.27.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -742,9 +717,6 @@
742
  "language_model.model.layers.27.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
743
  "language_model.model.layers.27.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
744
  "language_model.model.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
745
- "language_model.model.layers.27.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
746
- "language_model.model.layers.27.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
747
- "language_model.model.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
748
  "language_model.model.layers.28.input_layernorm.weight": "model-00001-of-00002.safetensors",
749
  "language_model.model.layers.28.layer_scalar": "model-00001-of-00002.safetensors",
750
  "language_model.model.layers.28.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
@@ -766,10 +738,6 @@
766
  "language_model.model.layers.28.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
767
  "language_model.model.layers.28.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
768
  "language_model.model.layers.28.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
769
- "language_model.model.layers.28.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
770
- "language_model.model.layers.28.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
771
- "language_model.model.layers.28.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
772
- "language_model.model.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
773
  "language_model.model.layers.28.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
774
  "language_model.model.layers.28.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
775
  "language_model.model.layers.28.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -777,9 +745,6 @@
777
  "language_model.model.layers.28.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
778
  "language_model.model.layers.28.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
779
  "language_model.model.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
780
- "language_model.model.layers.28.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
781
- "language_model.model.layers.28.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
782
- "language_model.model.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
783
  "language_model.model.layers.29.input_layernorm.weight": "model-00001-of-00002.safetensors",
784
  "language_model.model.layers.29.layer_scalar": "model-00001-of-00002.safetensors",
785
  "language_model.model.layers.29.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
@@ -801,10 +766,6 @@
801
  "language_model.model.layers.29.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
802
  "language_model.model.layers.29.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
803
  "language_model.model.layers.29.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
804
- "language_model.model.layers.29.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
805
- "language_model.model.layers.29.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
806
- "language_model.model.layers.29.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
807
- "language_model.model.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
808
  "language_model.model.layers.29.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
809
  "language_model.model.layers.29.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
810
  "language_model.model.layers.29.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -812,9 +773,6 @@
812
  "language_model.model.layers.29.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
813
  "language_model.model.layers.29.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
814
  "language_model.model.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
815
- "language_model.model.layers.29.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
816
- "language_model.model.layers.29.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
817
- "language_model.model.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
818
  "language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
819
  "language_model.model.layers.3.layer_scalar": "model-00001-of-00002.safetensors",
820
  "language_model.model.layers.3.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
@@ -871,10 +829,6 @@
871
  "language_model.model.layers.30.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
872
  "language_model.model.layers.30.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
873
  "language_model.model.layers.30.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
874
- "language_model.model.layers.30.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
875
- "language_model.model.layers.30.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
876
- "language_model.model.layers.30.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
877
- "language_model.model.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
878
  "language_model.model.layers.30.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
879
  "language_model.model.layers.30.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
880
  "language_model.model.layers.30.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -882,9 +836,6 @@
882
  "language_model.model.layers.30.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
883
  "language_model.model.layers.30.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
884
  "language_model.model.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
885
- "language_model.model.layers.30.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
886
- "language_model.model.layers.30.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
887
- "language_model.model.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
888
  "language_model.model.layers.31.input_layernorm.weight": "model-00001-of-00002.safetensors",
889
  "language_model.model.layers.31.layer_scalar": "model-00001-of-00002.safetensors",
890
  "language_model.model.layers.31.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
@@ -906,10 +857,6 @@
906
  "language_model.model.layers.31.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
907
  "language_model.model.layers.31.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
908
  "language_model.model.layers.31.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
909
- "language_model.model.layers.31.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
910
- "language_model.model.layers.31.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
911
- "language_model.model.layers.31.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
912
- "language_model.model.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
913
  "language_model.model.layers.31.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
914
  "language_model.model.layers.31.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
915
  "language_model.model.layers.31.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -917,9 +864,6 @@
917
  "language_model.model.layers.31.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
918
  "language_model.model.layers.31.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
919
  "language_model.model.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
920
- "language_model.model.layers.31.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
921
- "language_model.model.layers.31.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
922
- "language_model.model.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
923
  "language_model.model.layers.32.input_layernorm.weight": "model-00001-of-00002.safetensors",
924
  "language_model.model.layers.32.layer_scalar": "model-00001-of-00002.safetensors",
925
  "language_model.model.layers.32.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
@@ -941,10 +885,6 @@
941
  "language_model.model.layers.32.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
942
  "language_model.model.layers.32.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
943
  "language_model.model.layers.32.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
944
- "language_model.model.layers.32.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
945
- "language_model.model.layers.32.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
946
- "language_model.model.layers.32.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
947
- "language_model.model.layers.32.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
948
  "language_model.model.layers.32.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
949
  "language_model.model.layers.32.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
950
  "language_model.model.layers.32.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -952,9 +892,6 @@
952
  "language_model.model.layers.32.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
953
  "language_model.model.layers.32.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
954
  "language_model.model.layers.32.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
955
- "language_model.model.layers.32.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
956
- "language_model.model.layers.32.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
957
- "language_model.model.layers.32.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
958
  "language_model.model.layers.33.input_layernorm.weight": "model-00001-of-00002.safetensors",
959
  "language_model.model.layers.33.layer_scalar": "model-00001-of-00002.safetensors",
960
  "language_model.model.layers.33.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
@@ -976,10 +913,6 @@
976
  "language_model.model.layers.33.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
977
  "language_model.model.layers.33.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
978
  "language_model.model.layers.33.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
979
- "language_model.model.layers.33.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
980
- "language_model.model.layers.33.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
981
- "language_model.model.layers.33.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
982
- "language_model.model.layers.33.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
983
  "language_model.model.layers.33.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
984
  "language_model.model.layers.33.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
985
  "language_model.model.layers.33.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -987,9 +920,6 @@
987
  "language_model.model.layers.33.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
988
  "language_model.model.layers.33.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
989
  "language_model.model.layers.33.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
990
- "language_model.model.layers.33.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
991
- "language_model.model.layers.33.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
992
- "language_model.model.layers.33.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
993
  "language_model.model.layers.34.input_layernorm.weight": "model-00001-of-00002.safetensors",
994
  "language_model.model.layers.34.layer_scalar": "model-00001-of-00002.safetensors",
995
  "language_model.model.layers.34.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
@@ -1011,10 +941,6 @@
1011
  "language_model.model.layers.34.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1012
  "language_model.model.layers.34.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
1013
  "language_model.model.layers.34.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1014
- "language_model.model.layers.34.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1015
- "language_model.model.layers.34.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
1016
- "language_model.model.layers.34.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
1017
- "language_model.model.layers.34.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1018
  "language_model.model.layers.34.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
1019
  "language_model.model.layers.34.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
1020
  "language_model.model.layers.34.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -1022,9 +948,6 @@
1022
  "language_model.model.layers.34.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
1023
  "language_model.model.layers.34.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
1024
  "language_model.model.layers.34.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1025
- "language_model.model.layers.34.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
1026
- "language_model.model.layers.34.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
1027
- "language_model.model.layers.34.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1028
  "language_model.model.layers.35.input_layernorm.weight": "model-00001-of-00002.safetensors",
1029
  "language_model.model.layers.35.layer_scalar": "model-00001-of-00002.safetensors",
1030
  "language_model.model.layers.35.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
@@ -1046,10 +969,6 @@
1046
  "language_model.model.layers.35.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1047
  "language_model.model.layers.35.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
1048
  "language_model.model.layers.35.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1049
- "language_model.model.layers.35.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1050
- "language_model.model.layers.35.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
1051
- "language_model.model.layers.35.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
1052
- "language_model.model.layers.35.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1053
  "language_model.model.layers.35.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
1054
  "language_model.model.layers.35.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
1055
  "language_model.model.layers.35.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -1057,9 +976,6 @@
1057
  "language_model.model.layers.35.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
1058
  "language_model.model.layers.35.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
1059
  "language_model.model.layers.35.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1060
- "language_model.model.layers.35.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
1061
- "language_model.model.layers.35.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
1062
- "language_model.model.layers.35.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1063
  "language_model.model.layers.36.input_layernorm.weight": "model-00001-of-00002.safetensors",
1064
  "language_model.model.layers.36.layer_scalar": "model-00001-of-00002.safetensors",
1065
  "language_model.model.layers.36.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
@@ -1081,10 +997,6 @@
1081
  "language_model.model.layers.36.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1082
  "language_model.model.layers.36.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
1083
  "language_model.model.layers.36.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1084
- "language_model.model.layers.36.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1085
- "language_model.model.layers.36.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
1086
- "language_model.model.layers.36.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
1087
- "language_model.model.layers.36.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1088
  "language_model.model.layers.36.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
1089
  "language_model.model.layers.36.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
1090
  "language_model.model.layers.36.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -1092,9 +1004,6 @@
1092
  "language_model.model.layers.36.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
1093
  "language_model.model.layers.36.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
1094
  "language_model.model.layers.36.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1095
- "language_model.model.layers.36.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
1096
- "language_model.model.layers.36.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
1097
- "language_model.model.layers.36.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1098
  "language_model.model.layers.37.input_layernorm.weight": "model-00001-of-00002.safetensors",
1099
  "language_model.model.layers.37.layer_scalar": "model-00001-of-00002.safetensors",
1100
  "language_model.model.layers.37.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
@@ -1116,10 +1025,6 @@
1116
  "language_model.model.layers.37.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1117
  "language_model.model.layers.37.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
1118
  "language_model.model.layers.37.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1119
- "language_model.model.layers.37.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1120
- "language_model.model.layers.37.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
1121
- "language_model.model.layers.37.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
1122
- "language_model.model.layers.37.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1123
  "language_model.model.layers.37.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
1124
  "language_model.model.layers.37.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
1125
  "language_model.model.layers.37.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -1127,9 +1032,6 @@
1127
  "language_model.model.layers.37.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
1128
  "language_model.model.layers.37.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
1129
  "language_model.model.layers.37.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1130
- "language_model.model.layers.37.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
1131
- "language_model.model.layers.37.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
1132
- "language_model.model.layers.37.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1133
  "language_model.model.layers.38.input_layernorm.weight": "model-00001-of-00002.safetensors",
1134
  "language_model.model.layers.38.layer_scalar": "model-00001-of-00002.safetensors",
1135
  "language_model.model.layers.38.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
@@ -1151,10 +1053,6 @@
1151
  "language_model.model.layers.38.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1152
  "language_model.model.layers.38.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
1153
  "language_model.model.layers.38.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1154
- "language_model.model.layers.38.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1155
- "language_model.model.layers.38.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
1156
- "language_model.model.layers.38.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
1157
- "language_model.model.layers.38.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1158
  "language_model.model.layers.38.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
1159
  "language_model.model.layers.38.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
1160
  "language_model.model.layers.38.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -1162,9 +1060,6 @@
1162
  "language_model.model.layers.38.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
1163
  "language_model.model.layers.38.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
1164
  "language_model.model.layers.38.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1165
- "language_model.model.layers.38.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
1166
- "language_model.model.layers.38.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
1167
- "language_model.model.layers.38.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1168
  "language_model.model.layers.39.input_layernorm.weight": "model-00001-of-00002.safetensors",
1169
  "language_model.model.layers.39.layer_scalar": "model-00001-of-00002.safetensors",
1170
  "language_model.model.layers.39.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
@@ -1186,10 +1081,6 @@
1186
  "language_model.model.layers.39.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1187
  "language_model.model.layers.39.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
1188
  "language_model.model.layers.39.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1189
- "language_model.model.layers.39.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1190
- "language_model.model.layers.39.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
1191
- "language_model.model.layers.39.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
1192
- "language_model.model.layers.39.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1193
  "language_model.model.layers.39.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
1194
  "language_model.model.layers.39.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
1195
  "language_model.model.layers.39.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -1197,9 +1088,6 @@
1197
  "language_model.model.layers.39.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
1198
  "language_model.model.layers.39.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
1199
  "language_model.model.layers.39.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1200
- "language_model.model.layers.39.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
1201
- "language_model.model.layers.39.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
1202
- "language_model.model.layers.39.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1203
  "language_model.model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
1204
  "language_model.model.layers.4.layer_scalar": "model-00001-of-00002.safetensors",
1205
  "language_model.model.layers.4.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
@@ -1256,10 +1144,6 @@
1256
  "language_model.model.layers.40.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1257
  "language_model.model.layers.40.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
1258
  "language_model.model.layers.40.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1259
- "language_model.model.layers.40.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1260
- "language_model.model.layers.40.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
1261
- "language_model.model.layers.40.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
1262
- "language_model.model.layers.40.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1263
  "language_model.model.layers.40.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
1264
  "language_model.model.layers.40.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
1265
  "language_model.model.layers.40.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -1267,9 +1151,6 @@
1267
  "language_model.model.layers.40.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
1268
  "language_model.model.layers.40.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
1269
  "language_model.model.layers.40.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1270
- "language_model.model.layers.40.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
1271
- "language_model.model.layers.40.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
1272
- "language_model.model.layers.40.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1273
  "language_model.model.layers.41.input_layernorm.weight": "model-00001-of-00002.safetensors",
1274
  "language_model.model.layers.41.layer_scalar": "model-00001-of-00002.safetensors",
1275
  "language_model.model.layers.41.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
@@ -1291,10 +1172,6 @@
1291
  "language_model.model.layers.41.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1292
  "language_model.model.layers.41.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
1293
  "language_model.model.layers.41.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1294
- "language_model.model.layers.41.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
1295
- "language_model.model.layers.41.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
1296
- "language_model.model.layers.41.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
1297
- "language_model.model.layers.41.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
1298
  "language_model.model.layers.41.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
1299
  "language_model.model.layers.41.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
1300
  "language_model.model.layers.41.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -1302,9 +1179,6 @@
1302
  "language_model.model.layers.41.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
1303
  "language_model.model.layers.41.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
1304
  "language_model.model.layers.41.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
1305
- "language_model.model.layers.41.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
1306
- "language_model.model.layers.41.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
1307
- "language_model.model.layers.41.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
1308
  "language_model.model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
1309
  "language_model.model.layers.5.layer_scalar": "model-00001-of-00002.safetensors",
1310
  "language_model.model.layers.5.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 6064363604,
4
+ "total_parameters": 7463013376
5
  },
6
  "weight_map": {
7
  "language_model.model.embed_tokens.biases": "model-00001-of-00002.safetensors",
 
626
  "language_model.model.layers.24.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
627
  "language_model.model.layers.24.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
628
  "language_model.model.layers.24.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
 
 
 
 
629
  "language_model.model.layers.24.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
630
  "language_model.model.layers.24.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
631
  "language_model.model.layers.24.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
633
  "language_model.model.layers.24.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
634
  "language_model.model.layers.24.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
635
  "language_model.model.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
 
 
636
  "language_model.model.layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors",
637
  "language_model.model.layers.25.layer_scalar": "model-00001-of-00002.safetensors",
638
  "language_model.model.layers.25.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
 
654
  "language_model.model.layers.25.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
655
  "language_model.model.layers.25.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
656
  "language_model.model.layers.25.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
 
 
 
 
657
  "language_model.model.layers.25.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
658
  "language_model.model.layers.25.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
659
  "language_model.model.layers.25.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
661
  "language_model.model.layers.25.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
662
  "language_model.model.layers.25.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
663
  "language_model.model.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
 
 
664
  "language_model.model.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors",
665
  "language_model.model.layers.26.layer_scalar": "model-00001-of-00002.safetensors",
666
  "language_model.model.layers.26.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
 
682
  "language_model.model.layers.26.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
683
  "language_model.model.layers.26.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
684
  "language_model.model.layers.26.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
 
 
 
 
685
  "language_model.model.layers.26.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
686
  "language_model.model.layers.26.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
687
  "language_model.model.layers.26.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
689
  "language_model.model.layers.26.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
690
  "language_model.model.layers.26.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
691
  "language_model.model.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
 
 
692
  "language_model.model.layers.27.input_layernorm.weight": "model-00001-of-00002.safetensors",
693
  "language_model.model.layers.27.layer_scalar": "model-00001-of-00002.safetensors",
694
  "language_model.model.layers.27.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
 
710
  "language_model.model.layers.27.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
711
  "language_model.model.layers.27.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
712
  "language_model.model.layers.27.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
 
 
 
 
713
  "language_model.model.layers.27.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
714
  "language_model.model.layers.27.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
715
  "language_model.model.layers.27.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
717
  "language_model.model.layers.27.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
718
  "language_model.model.layers.27.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
719
  "language_model.model.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
 
 
720
  "language_model.model.layers.28.input_layernorm.weight": "model-00001-of-00002.safetensors",
721
  "language_model.model.layers.28.layer_scalar": "model-00001-of-00002.safetensors",
722
  "language_model.model.layers.28.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
 
738
  "language_model.model.layers.28.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
739
  "language_model.model.layers.28.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
740
  "language_model.model.layers.28.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
 
 
 
 
741
  "language_model.model.layers.28.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
742
  "language_model.model.layers.28.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
743
  "language_model.model.layers.28.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
745
  "language_model.model.layers.28.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
746
  "language_model.model.layers.28.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
747
  "language_model.model.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
 
 
748
  "language_model.model.layers.29.input_layernorm.weight": "model-00001-of-00002.safetensors",
749
  "language_model.model.layers.29.layer_scalar": "model-00001-of-00002.safetensors",
750
  "language_model.model.layers.29.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
 
766
  "language_model.model.layers.29.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
767
  "language_model.model.layers.29.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
768
  "language_model.model.layers.29.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
 
 
 
 
769
  "language_model.model.layers.29.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
770
  "language_model.model.layers.29.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
771
  "language_model.model.layers.29.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
773
  "language_model.model.layers.29.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
774
  "language_model.model.layers.29.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
775
  "language_model.model.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
 
 
776
  "language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
777
  "language_model.model.layers.3.layer_scalar": "model-00001-of-00002.safetensors",
778
  "language_model.model.layers.3.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
 
829
  "language_model.model.layers.30.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
830
  "language_model.model.layers.30.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
831
  "language_model.model.layers.30.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
 
 
 
 
832
  "language_model.model.layers.30.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
833
  "language_model.model.layers.30.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
834
  "language_model.model.layers.30.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
836
  "language_model.model.layers.30.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
837
  "language_model.model.layers.30.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
838
  "language_model.model.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
 
 
839
  "language_model.model.layers.31.input_layernorm.weight": "model-00001-of-00002.safetensors",
840
  "language_model.model.layers.31.layer_scalar": "model-00001-of-00002.safetensors",
841
  "language_model.model.layers.31.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
 
857
  "language_model.model.layers.31.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
858
  "language_model.model.layers.31.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
859
  "language_model.model.layers.31.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
 
 
 
 
860
  "language_model.model.layers.31.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
861
  "language_model.model.layers.31.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
862
  "language_model.model.layers.31.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
864
  "language_model.model.layers.31.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
865
  "language_model.model.layers.31.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
866
  "language_model.model.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
 
 
867
  "language_model.model.layers.32.input_layernorm.weight": "model-00001-of-00002.safetensors",
868
  "language_model.model.layers.32.layer_scalar": "model-00001-of-00002.safetensors",
869
  "language_model.model.layers.32.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
 
885
  "language_model.model.layers.32.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
886
  "language_model.model.layers.32.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
887
  "language_model.model.layers.32.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
 
 
 
 
888
  "language_model.model.layers.32.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
889
  "language_model.model.layers.32.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
890
  "language_model.model.layers.32.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
892
  "language_model.model.layers.32.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
893
  "language_model.model.layers.32.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
894
  "language_model.model.layers.32.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
 
 
895
  "language_model.model.layers.33.input_layernorm.weight": "model-00001-of-00002.safetensors",
896
  "language_model.model.layers.33.layer_scalar": "model-00001-of-00002.safetensors",
897
  "language_model.model.layers.33.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
 
913
  "language_model.model.layers.33.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
914
  "language_model.model.layers.33.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
915
  "language_model.model.layers.33.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
 
 
 
 
916
  "language_model.model.layers.33.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
917
  "language_model.model.layers.33.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
918
  "language_model.model.layers.33.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
920
  "language_model.model.layers.33.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
921
  "language_model.model.layers.33.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
922
  "language_model.model.layers.33.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
 
 
923
  "language_model.model.layers.34.input_layernorm.weight": "model-00001-of-00002.safetensors",
924
  "language_model.model.layers.34.layer_scalar": "model-00001-of-00002.safetensors",
925
  "language_model.model.layers.34.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
 
941
  "language_model.model.layers.34.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
942
  "language_model.model.layers.34.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
943
  "language_model.model.layers.34.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
 
 
 
 
944
  "language_model.model.layers.34.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
945
  "language_model.model.layers.34.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
946
  "language_model.model.layers.34.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
948
  "language_model.model.layers.34.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
949
  "language_model.model.layers.34.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
950
  "language_model.model.layers.34.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
 
 
951
  "language_model.model.layers.35.input_layernorm.weight": "model-00001-of-00002.safetensors",
952
  "language_model.model.layers.35.layer_scalar": "model-00001-of-00002.safetensors",
953
  "language_model.model.layers.35.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
 
969
  "language_model.model.layers.35.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
970
  "language_model.model.layers.35.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
971
  "language_model.model.layers.35.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
 
 
 
 
972
  "language_model.model.layers.35.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
973
  "language_model.model.layers.35.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
974
  "language_model.model.layers.35.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
976
  "language_model.model.layers.35.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
977
  "language_model.model.layers.35.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
978
  "language_model.model.layers.35.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
 
 
979
  "language_model.model.layers.36.input_layernorm.weight": "model-00001-of-00002.safetensors",
980
  "language_model.model.layers.36.layer_scalar": "model-00001-of-00002.safetensors",
981
  "language_model.model.layers.36.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
 
997
  "language_model.model.layers.36.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
998
  "language_model.model.layers.36.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
999
  "language_model.model.layers.36.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
 
 
 
 
1000
  "language_model.model.layers.36.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
1001
  "language_model.model.layers.36.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
1002
  "language_model.model.layers.36.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
1004
  "language_model.model.layers.36.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
1005
  "language_model.model.layers.36.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
1006
  "language_model.model.layers.36.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
 
 
1007
  "language_model.model.layers.37.input_layernorm.weight": "model-00001-of-00002.safetensors",
1008
  "language_model.model.layers.37.layer_scalar": "model-00001-of-00002.safetensors",
1009
  "language_model.model.layers.37.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
 
1025
  "language_model.model.layers.37.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1026
  "language_model.model.layers.37.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
1027
  "language_model.model.layers.37.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
 
 
 
 
1028
  "language_model.model.layers.37.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
1029
  "language_model.model.layers.37.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
1030
  "language_model.model.layers.37.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
1032
  "language_model.model.layers.37.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
1033
  "language_model.model.layers.37.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
1034
  "language_model.model.layers.37.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
 
 
1035
  "language_model.model.layers.38.input_layernorm.weight": "model-00001-of-00002.safetensors",
1036
  "language_model.model.layers.38.layer_scalar": "model-00001-of-00002.safetensors",
1037
  "language_model.model.layers.38.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
 
1053
  "language_model.model.layers.38.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1054
  "language_model.model.layers.38.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
1055
  "language_model.model.layers.38.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
 
 
 
 
1056
  "language_model.model.layers.38.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
1057
  "language_model.model.layers.38.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
1058
  "language_model.model.layers.38.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
1060
  "language_model.model.layers.38.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
1061
  "language_model.model.layers.38.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
1062
  "language_model.model.layers.38.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
 
 
1063
  "language_model.model.layers.39.input_layernorm.weight": "model-00001-of-00002.safetensors",
1064
  "language_model.model.layers.39.layer_scalar": "model-00001-of-00002.safetensors",
1065
  "language_model.model.layers.39.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
 
1081
  "language_model.model.layers.39.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1082
  "language_model.model.layers.39.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
1083
  "language_model.model.layers.39.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
 
 
 
 
1084
  "language_model.model.layers.39.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
1085
  "language_model.model.layers.39.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
1086
  "language_model.model.layers.39.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
1088
  "language_model.model.layers.39.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
1089
  "language_model.model.layers.39.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
1090
  "language_model.model.layers.39.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
 
 
1091
  "language_model.model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
1092
  "language_model.model.layers.4.layer_scalar": "model-00001-of-00002.safetensors",
1093
  "language_model.model.layers.4.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
 
1144
  "language_model.model.layers.40.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1145
  "language_model.model.layers.40.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
1146
  "language_model.model.layers.40.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
 
 
 
 
1147
  "language_model.model.layers.40.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
1148
  "language_model.model.layers.40.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
1149
  "language_model.model.layers.40.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
1151
  "language_model.model.layers.40.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
1152
  "language_model.model.layers.40.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
1153
  "language_model.model.layers.40.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
 
 
1154
  "language_model.model.layers.41.input_layernorm.weight": "model-00001-of-00002.safetensors",
1155
  "language_model.model.layers.41.layer_scalar": "model-00001-of-00002.safetensors",
1156
  "language_model.model.layers.41.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
 
1172
  "language_model.model.layers.41.post_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
1173
  "language_model.model.layers.41.post_per_layer_input_norm.weight": "model-00001-of-00002.safetensors",
1174
  "language_model.model.layers.41.pre_feedforward_layernorm.weight": "model-00001-of-00002.safetensors",
 
 
 
 
1175
  "language_model.model.layers.41.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
1176
  "language_model.model.layers.41.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
1177
  "language_model.model.layers.41.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
1179
  "language_model.model.layers.41.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
1180
  "language_model.model.layers.41.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
1181
  "language_model.model.layers.41.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
 
 
1182
  "language_model.model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
1183
  "language_model.model.layers.5.layer_scalar": "model-00001-of-00002.safetensors",
1184
  "language_model.model.layers.5.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
tokenizer_config.json CHANGED
@@ -18,6 +18,7 @@
18
  ],
19
  "image_token": "<|image|>",
20
  "is_local": true,
 
21
  "mask_token": "<mask>",
22
  "model_max_length": 1000000000000000019884624838656,
23
  "model_specific_special_tokens": {
 
18
  ],
19
  "image_token": "<|image|>",
20
  "is_local": true,
21
+ "local_files_only": false,
22
  "mask_token": "<mask>",
23
  "model_max_length": 1000000000000000019884624838656,
24
  "model_specific_special_tokens": {