dealignai commited on
Commit
6a0a251
·
verified ·
1 Parent(s): a307060

v2 surgery log

Browse files
Files changed (1) hide show
  1. crack_surgery_log.json +51 -111
crack_surgery_log.json CHANGED
@@ -3,62 +3,32 @@
3
  "direction_layer": 42,
4
  "per_layer": true,
5
  "target_layers": [
6
- 5,
7
- 12,
8
- 19,
9
  26,
10
  33,
11
  42
12
  ],
13
  "mamba_layers": [
14
- 4,
15
- 11,
16
- 18,
17
- 25,
18
  32,
19
- 41
 
 
 
 
 
20
  ],
21
- "strength": 12.0,
22
- "mamba_strength": 7.0,
23
- "ablation": "standard",
24
  "mode": "affine",
25
  "bits": 4,
26
  "group_size": 32,
27
  "results": [
28
- {
29
- "layer": 5,
30
- "delta_norm": 17.83401107788086,
31
- "change_pct": 31.373558140931667,
32
- "shards": [
33
- "model-00001-of-00020.safetensors"
34
- ],
35
- "kind": "attn",
36
- "tensor": "o_proj"
37
- },
38
- {
39
- "layer": 12,
40
- "delta_norm": 23.010862350463867,
41
- "change_pct": 34.08291636730016,
42
- "shards": [
43
- "model-00001-of-00020.safetensors"
44
- ],
45
- "kind": "attn",
46
- "tensor": "o_proj"
47
- },
48
- {
49
- "layer": 19,
50
- "delta_norm": 20.308698654174805,
51
- "change_pct": 27.78214477306932,
52
- "shards": [
53
- "model-00001-of-00020.safetensors"
54
- ],
55
- "kind": "attn",
56
- "tensor": "o_proj"
57
- },
58
  {
59
  "layer": 26,
60
- "delta_norm": 31.625486373901367,
61
- "change_pct": 39.71396844336149,
62
  "shards": [
63
  "model-00001-of-00020.safetensors"
64
  ],
@@ -67,8 +37,8 @@
67
  },
68
  {
69
  "layer": 33,
70
- "delta_norm": 30.05658531188965,
71
- "change_pct": 34.12074176761659,
72
  "shards": [
73
  "model-00001-of-00020.safetensors"
74
  ],
@@ -77,8 +47,8 @@
77
  },
78
  {
79
  "layer": 42,
80
- "delta_norm": 21.988754272460938,
81
- "change_pct": 22.68252935020654,
82
  "shards": [
83
  "model-00001-of-00020.safetensors"
84
  ],
@@ -86,9 +56,9 @@
86
  "tensor": "o_proj"
87
  },
88
  {
89
- "layer": 4,
90
- "delta_norm": 12.33937931060791,
91
- "change_pct": 15.995633397450485,
92
  "shards": [
93
  "model-00001-of-00020.safetensors"
94
  ],
@@ -96,9 +66,9 @@
96
  "tensor": "out_proj"
97
  },
98
  {
99
- "layer": 11,
100
- "delta_norm": 13.508964538574219,
101
- "change_pct": 17.711041219738814,
102
  "shards": [
103
  "model-00001-of-00020.safetensors"
104
  ],
@@ -106,9 +76,9 @@
106
  "tensor": "out_proj"
107
  },
108
  {
109
- "layer": 18,
110
- "delta_norm": 12.32869815826416,
111
- "change_pct": 16.558263786691434,
112
  "shards": [
113
  "model-00001-of-00020.safetensors"
114
  ],
@@ -116,9 +86,9 @@
116
  "tensor": "out_proj"
117
  },
118
  {
119
- "layer": 25,
120
- "delta_norm": 12.160455703735352,
121
- "change_pct": 16.252346497511514,
122
  "shards": [
123
  "model-00001-of-00020.safetensors"
124
  ],
@@ -126,9 +96,9 @@
126
  "tensor": "out_proj"
127
  },
128
  {
129
- "layer": 32,
130
- "delta_norm": 12.021119117736816,
131
- "change_pct": 16.06200039185752,
132
  "shards": [
133
  "model-00001-of-00020.safetensors"
134
  ],
@@ -136,9 +106,9 @@
136
  "tensor": "out_proj"
137
  },
138
  {
139
- "layer": 41,
140
- "delta_norm": 11.181370735168457,
141
- "change_pct": 14.753241895789024,
142
  "shards": [
143
  "model-00001-of-00020.safetensors"
144
  ],
@@ -146,64 +116,34 @@
146
  "tensor": "out_proj"
147
  },
148
  {
149
- "layer": 6,
150
- "delta_norm": 8.025799751281738,
151
- "change_pct": 14.542831747174574,
152
- "shards": [
153
- "model-00001-of-00020.safetensors"
154
- ],
155
- "kind": "shared",
156
- "tensor": "shared_experts.down_proj"
157
- },
158
- {
159
- "layer": 13,
160
- "delta_norm": 8.742560386657715,
161
- "change_pct": 16.381933673250934,
162
- "shards": [
163
- "model-00001-of-00020.safetensors"
164
- ],
165
- "kind": "shared",
166
- "tensor": "shared_experts.down_proj"
167
- },
168
- {
169
- "layer": 20,
170
- "delta_norm": 8.3919038772583,
171
- "change_pct": 16.503345812252668,
172
- "shards": [
173
- "model-00001-of-00020.safetensors"
174
- ],
175
- "kind": "shared",
176
- "tensor": "shared_experts.down_proj"
177
- },
178
- {
179
- "layer": 27,
180
- "delta_norm": 9.08780574798584,
181
- "change_pct": 16.93895002437764,
182
  "shards": [
183
  "model-00001-of-00020.safetensors"
184
  ],
185
- "kind": "shared",
186
- "tensor": "shared_experts.down_proj"
187
  },
188
  {
189
- "layer": 34,
190
- "delta_norm": 8.4222412109375,
191
- "change_pct": 15.360079432949368,
192
  "shards": [
193
- "model-00001-of-00020.safetensors"
194
  ],
195
- "kind": "shared",
196
- "tensor": "shared_experts.down_proj"
197
  },
198
  {
199
- "layer": 43,
200
- "delta_norm": 7.951038360595703,
201
- "change_pct": 14.324345257407614,
202
  "shards": [
203
  "model-00002-of-00020.safetensors"
204
  ],
205
- "kind": "shared",
206
- "tensor": "shared_experts.down_proj"
207
  }
208
  ]
209
  }
 
3
  "direction_layer": 42,
4
  "per_layer": true,
5
  "target_layers": [
 
 
 
6
  26,
7
  33,
8
  42
9
  ],
10
  "mamba_layers": [
11
+ 28,
12
+ 30,
 
 
13
  32,
14
+ 35,
15
+ 37,
16
+ 39,
17
+ 41,
18
+ 44,
19
+ 46
20
  ],
21
+ "strength": 8.0,
22
+ "mamba_strength": 5.0,
23
+ "ablation": "mpoa",
24
  "mode": "affine",
25
  "bits": 4,
26
  "group_size": 32,
27
  "results": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  {
29
  "layer": 26,
30
+ "delta_norm": 19.6954288482666,
31
+ "change_pct": 24.732699143688773,
32
  "shards": [
33
  "model-00001-of-00020.safetensors"
34
  ],
 
37
  },
38
  {
39
  "layer": 33,
40
+ "delta_norm": 18.917333602905273,
41
+ "change_pct": 21.475275654192608,
42
  "shards": [
43
  "model-00001-of-00020.safetensors"
44
  ],
 
47
  },
48
  {
49
  "layer": 42,
50
+ "delta_norm": 14.288287162780762,
51
+ "change_pct": 14.739102039075265,
52
  "shards": [
53
  "model-00001-of-00020.safetensors"
54
  ],
 
56
  "tensor": "o_proj"
57
  },
58
  {
59
+ "layer": 28,
60
+ "delta_norm": 9.896612167358398,
61
+ "change_pct": 13.162798166580583,
62
  "shards": [
63
  "model-00001-of-00020.safetensors"
64
  ],
 
66
  "tensor": "out_proj"
67
  },
68
  {
69
+ "layer": 30,
70
+ "delta_norm": 8.51060962677002,
71
+ "change_pct": 11.320950630825633,
72
  "shards": [
73
  "model-00001-of-00020.safetensors"
74
  ],
 
76
  "tensor": "out_proj"
77
  },
78
  {
79
+ "layer": 32,
80
+ "delta_norm": 8.511087417602539,
81
+ "change_pct": 11.372076767375273,
82
  "shards": [
83
  "model-00001-of-00020.safetensors"
84
  ],
 
86
  "tensor": "out_proj"
87
  },
88
  {
89
+ "layer": 35,
90
+ "delta_norm": 8.443239212036133,
91
+ "change_pct": 11.194377584788132,
92
  "shards": [
93
  "model-00001-of-00020.safetensors"
94
  ],
 
96
  "tensor": "out_proj"
97
  },
98
  {
99
+ "layer": 37,
100
+ "delta_norm": 7.73286771774292,
101
+ "change_pct": 10.304361383970939,
102
  "shards": [
103
  "model-00001-of-00020.safetensors"
104
  ],
 
106
  "tensor": "out_proj"
107
  },
108
  {
109
+ "layer": 39,
110
+ "delta_norm": 7.7536301612854,
111
+ "change_pct": 10.372444483114784,
112
  "shards": [
113
  "model-00001-of-00020.safetensors"
114
  ],
 
116
  "tensor": "out_proj"
117
  },
118
  {
119
+ "layer": 41,
120
+ "delta_norm": 7.9247965812683105,
121
+ "change_pct": 10.45636028958768,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  "shards": [
123
  "model-00001-of-00020.safetensors"
124
  ],
125
+ "kind": "mamba",
126
+ "tensor": "out_proj"
127
  },
128
  {
129
+ "layer": 44,
130
+ "delta_norm": 7.85770320892334,
131
+ "change_pct": 10.502834646504375,
132
  "shards": [
133
+ "model-00002-of-00020.safetensors"
134
  ],
135
+ "kind": "mamba",
136
+ "tensor": "out_proj"
137
  },
138
  {
139
+ "layer": 46,
140
+ "delta_norm": 8.16724967956543,
141
+ "change_pct": 11.024374577831475,
142
  "shards": [
143
  "model-00002-of-00020.safetensors"
144
  ],
145
+ "kind": "mamba",
146
+ "tensor": "out_proj"
147
  }
148
  ]
149
  }