broadfield-dev commited on
Commit
e7607cc
·
verified ·
1 Parent(s): 852df2f

Merged (Healed) Model from CPU Slicing+Stitch+LoRA

Browse files
config.json CHANGED
@@ -15,9 +15,9 @@
15
  "intermediate_size": 16384,
16
  "max_position_embeddings": 8192,
17
  "model_type": "gemma",
18
- "num_attention_heads": 10,
19
  "num_hidden_layers": 8,
20
- "num_key_value_heads": 10,
21
  "pad_token_id": 0,
22
  "rms_norm_eps": 1e-06,
23
  "rope_theta": 10000.0,
 
15
  "intermediate_size": 16384,
16
  "max_position_embeddings": 8192,
17
  "model_type": "gemma",
18
+ "num_attention_heads": 8,
19
  "num_hidden_layers": 8,
20
+ "num_key_value_heads": 8,
21
  "pad_token_id": 0,
22
  "rms_norm_eps": 1e-06,
23
  "rope_theta": 10000.0,
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:350b34ea799fbfb5de7c983810f34e20970374bbd88b6a5dc286c4499cc2f394
3
- size 4882257768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d90225c2487447c4472adaed2fed2968f063b1646f8ed60cdf7ca1dc7483736
3
+ size 4982938256
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b5ce83bba37f36d9d38669a6ec55bdce5175474163efe190f97003f64b440a3
3
- size 1107356096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cc8bc234e3df7cd20f4d934b8d155eab13588f2d53a2c49f2e9977ba7b1e9b1
3
+ size 872457880
model.safetensors.index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "metadata": {
3
- "total_parameters": 1497401344,
4
- "total_size": 5989605376
5
  },
6
  "weight_map": {
7
  "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
@@ -50,11 +50,11 @@
50
  "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
51
  "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
52
  "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
53
- "model.layers.5.input_layernorm.weight": "model-00002-of-00002.safetensors",
54
- "model.layers.5.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
55
  "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
56
  "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
57
- "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
58
  "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
59
  "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
60
  "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
@@ -64,10 +64,10 @@
64
  "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
65
  "model.layers.6.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
66
  "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
67
- "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
68
- "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
69
- "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
70
- "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
71
  "model.layers.7.input_layernorm.weight": "model-00002-of-00002.safetensors",
72
  "model.layers.7.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
73
  "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_parameters": 1463846912,
4
+ "total_size": 5855387648
5
  },
6
  "weight_map": {
7
  "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
 
50
  "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
51
  "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
52
  "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
53
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
54
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
55
  "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
56
  "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
57
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
58
  "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
59
  "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
60
  "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
 
64
  "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
65
  "model.layers.6.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
66
  "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
67
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
68
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
69
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
70
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
71
  "model.layers.7.input_layernorm.weight": "model-00002-of-00002.safetensors",
72
  "model.layers.7.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
73
  "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",