nahiar commited on
Commit
cd6b354
·
verified ·
1 Parent(s): 29b64b2

Initial upload (auto-create if missing)

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "dtype": "float32",
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "xlm-roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "output_past": true,
21
+ "pad_token_id": 1,
22
+ "position_embedding_type": "absolute",
23
+ "problem_type": "multi_label_classification",
24
+ "transformers_version": "4.57.3",
25
+ "type_vocab_size": 1,
26
+ "use_cache": true,
27
+ "vocab_size": 250002
28
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e5aee891b408de34a5105330b07df46b21804824f758fbada7821122e1a9fc0
3
+ size 1112205008
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ebc7636c9e4f553cb2aa1694c402c057929b915da5e711164ce31ab7ea97c84
3
+ size 2224532875
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1943a36cf77062341254c219efd6831e4d9a808dd2d7e2b50b04a148d555eea6
3
+ size 14645
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f723b5b260f4334e9aaa21475897e4a5db6bdba3a57a7df3baefee61216a260b
3
+ size 1465
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ffb37461c391f096759f4a9bbbc329da0f36952f88bab061fcf84940c022e98
3
+ size 17082999
tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "model_max_length": 512,
51
+ "pad_token": "<pad>",
52
+ "sep_token": "</s>",
53
+ "tokenizer_class": "XLMRobertaTokenizer",
54
+ "unk_token": "<unk>"
55
+ }
trainer_state.json ADDED
@@ -0,0 +1,462 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 2838,
3
+ "best_metric": 0.9269727168763274,
4
+ "best_model_checkpoint": "./results/checkpoint-2838",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 2838,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.052854122621564484,
14
+ "grad_norm": 7.628295421600342,
15
+ "learning_rate": 1.9654686398872448e-05,
16
+ "loss": 0.3925,
17
+ "step": 50
18
+ },
19
+ {
20
+ "epoch": 0.10570824524312897,
21
+ "grad_norm": 12.30865478515625,
22
+ "learning_rate": 1.9302325581395353e-05,
23
+ "loss": 0.3431,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 0.15856236786469344,
28
+ "grad_norm": 14.237386703491211,
29
+ "learning_rate": 1.8949964763918254e-05,
30
+ "loss": 0.3237,
31
+ "step": 150
32
+ },
33
+ {
34
+ "epoch": 0.21141649048625794,
35
+ "grad_norm": 28.253515243530273,
36
+ "learning_rate": 1.8597603946441155e-05,
37
+ "loss": 0.3131,
38
+ "step": 200
39
+ },
40
+ {
41
+ "epoch": 0.2642706131078224,
42
+ "grad_norm": 7.577988624572754,
43
+ "learning_rate": 1.824524312896406e-05,
44
+ "loss": 0.3027,
45
+ "step": 250
46
+ },
47
+ {
48
+ "epoch": 0.3171247357293869,
49
+ "grad_norm": 12.410346031188965,
50
+ "learning_rate": 1.7892882311486964e-05,
51
+ "loss": 0.2873,
52
+ "step": 300
53
+ },
54
+ {
55
+ "epoch": 0.3699788583509514,
56
+ "grad_norm": 17.205244064331055,
57
+ "learning_rate": 1.754052149400987e-05,
58
+ "loss": 0.3037,
59
+ "step": 350
60
+ },
61
+ {
62
+ "epoch": 0.42283298097251587,
63
+ "grad_norm": 15.440715789794922,
64
+ "learning_rate": 1.718816067653277e-05,
65
+ "loss": 0.2388,
66
+ "step": 400
67
+ },
68
+ {
69
+ "epoch": 0.47568710359408034,
70
+ "grad_norm": 18.95528793334961,
71
+ "learning_rate": 1.6835799859055675e-05,
72
+ "loss": 0.2834,
73
+ "step": 450
74
+ },
75
+ {
76
+ "epoch": 0.5285412262156448,
77
+ "grad_norm": 11.796369552612305,
78
+ "learning_rate": 1.6483439041578576e-05,
79
+ "loss": 0.282,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.5813953488372093,
84
+ "grad_norm": 16.309091567993164,
85
+ "learning_rate": 1.613107822410148e-05,
86
+ "loss": 0.2861,
87
+ "step": 550
88
+ },
89
+ {
90
+ "epoch": 0.6342494714587738,
91
+ "grad_norm": 34.3432731628418,
92
+ "learning_rate": 1.5778717406624385e-05,
93
+ "loss": 0.295,
94
+ "step": 600
95
+ },
96
+ {
97
+ "epoch": 0.6871035940803383,
98
+ "grad_norm": 7.3598151206970215,
99
+ "learning_rate": 1.542635658914729e-05,
100
+ "loss": 0.2786,
101
+ "step": 650
102
+ },
103
+ {
104
+ "epoch": 0.7399577167019028,
105
+ "grad_norm": 19.721378326416016,
106
+ "learning_rate": 1.5073995771670191e-05,
107
+ "loss": 0.2627,
108
+ "step": 700
109
+ },
110
+ {
111
+ "epoch": 0.7928118393234672,
112
+ "grad_norm": 6.87978982925415,
113
+ "learning_rate": 1.4721634954193096e-05,
114
+ "loss": 0.274,
115
+ "step": 750
116
+ },
117
+ {
118
+ "epoch": 0.8456659619450317,
119
+ "grad_norm": 23.790388107299805,
120
+ "learning_rate": 1.4369274136715999e-05,
121
+ "loss": 0.2477,
122
+ "step": 800
123
+ },
124
+ {
125
+ "epoch": 0.8985200845665962,
126
+ "grad_norm": 25.269287109375,
127
+ "learning_rate": 1.4016913319238903e-05,
128
+ "loss": 0.2559,
129
+ "step": 850
130
+ },
131
+ {
132
+ "epoch": 0.9513742071881607,
133
+ "grad_norm": 12.499388694763184,
134
+ "learning_rate": 1.3664552501761804e-05,
135
+ "loss": 0.2535,
136
+ "step": 900
137
+ },
138
+ {
139
+ "epoch": 1.0,
140
+ "eval_f1_macro": 0.8913938479590982,
141
+ "eval_f1_micro": 0.8905597326649958,
142
+ "eval_loss": 0.23979686200618744,
143
+ "eval_precision": 0.9089358799454298,
144
+ "eval_recall": 0.872911889944317,
145
+ "eval_runtime": 10.313,
146
+ "eval_samples_per_second": 366.721,
147
+ "eval_steps_per_second": 22.981,
148
+ "step": 946
149
+ },
150
+ {
151
+ "epoch": 1.0042283298097252,
152
+ "grad_norm": 20.737754821777344,
153
+ "learning_rate": 1.3312191684284707e-05,
154
+ "loss": 0.2342,
155
+ "step": 950
156
+ },
157
+ {
158
+ "epoch": 1.0570824524312896,
159
+ "grad_norm": 8.743599891662598,
160
+ "learning_rate": 1.2959830866807612e-05,
161
+ "loss": 0.1932,
162
+ "step": 1000
163
+ },
164
+ {
165
+ "epoch": 1.109936575052854,
166
+ "grad_norm": 15.532188415527344,
167
+ "learning_rate": 1.2607470049330515e-05,
168
+ "loss": 0.1546,
169
+ "step": 1050
170
+ },
171
+ {
172
+ "epoch": 1.1627906976744187,
173
+ "grad_norm": 10.706155776977539,
174
+ "learning_rate": 1.225510923185342e-05,
175
+ "loss": 0.2058,
176
+ "step": 1100
177
+ },
178
+ {
179
+ "epoch": 1.215644820295983,
180
+ "grad_norm": 6.785822868347168,
181
+ "learning_rate": 1.1902748414376322e-05,
182
+ "loss": 0.1603,
183
+ "step": 1150
184
+ },
185
+ {
186
+ "epoch": 1.2684989429175475,
187
+ "grad_norm": 19.18027687072754,
188
+ "learning_rate": 1.1550387596899227e-05,
189
+ "loss": 0.17,
190
+ "step": 1200
191
+ },
192
+ {
193
+ "epoch": 1.3213530655391121,
194
+ "grad_norm": 5.942857265472412,
195
+ "learning_rate": 1.1198026779422128e-05,
196
+ "loss": 0.2369,
197
+ "step": 1250
198
+ },
199
+ {
200
+ "epoch": 1.3742071881606766,
201
+ "grad_norm": 3.4256839752197266,
202
+ "learning_rate": 1.0845665961945033e-05,
203
+ "loss": 0.1843,
204
+ "step": 1300
205
+ },
206
+ {
207
+ "epoch": 1.427061310782241,
208
+ "grad_norm": 21.69367218017578,
209
+ "learning_rate": 1.0493305144467936e-05,
210
+ "loss": 0.1798,
211
+ "step": 1350
212
+ },
213
+ {
214
+ "epoch": 1.4799154334038054,
215
+ "grad_norm": 12.499094009399414,
216
+ "learning_rate": 1.014094432699084e-05,
217
+ "loss": 0.2137,
218
+ "step": 1400
219
+ },
220
+ {
221
+ "epoch": 1.53276955602537,
222
+ "grad_norm": 9.75059700012207,
223
+ "learning_rate": 9.788583509513743e-06,
224
+ "loss": 0.1616,
225
+ "step": 1450
226
+ },
227
+ {
228
+ "epoch": 1.5856236786469344,
229
+ "grad_norm": 16.770835876464844,
230
+ "learning_rate": 9.436222692036646e-06,
231
+ "loss": 0.1711,
232
+ "step": 1500
233
+ },
234
+ {
235
+ "epoch": 1.638477801268499,
236
+ "grad_norm": 7.7109222412109375,
237
+ "learning_rate": 9.083861874559549e-06,
238
+ "loss": 0.1757,
239
+ "step": 1550
240
+ },
241
+ {
242
+ "epoch": 1.6913319238900635,
243
+ "grad_norm": 8.317294120788574,
244
+ "learning_rate": 8.731501057082454e-06,
245
+ "loss": 0.1886,
246
+ "step": 1600
247
+ },
248
+ {
249
+ "epoch": 1.744186046511628,
250
+ "grad_norm": 7.418989181518555,
251
+ "learning_rate": 8.379140239605357e-06,
252
+ "loss": 0.1743,
253
+ "step": 1650
254
+ },
255
+ {
256
+ "epoch": 1.7970401691331923,
257
+ "grad_norm": 15.134026527404785,
258
+ "learning_rate": 8.02677942212826e-06,
259
+ "loss": 0.1829,
260
+ "step": 1700
261
+ },
262
+ {
263
+ "epoch": 1.8498942917547567,
264
+ "grad_norm": 8.40051555633545,
265
+ "learning_rate": 7.674418604651164e-06,
266
+ "loss": 0.1516,
267
+ "step": 1750
268
+ },
269
+ {
270
+ "epoch": 1.9027484143763214,
271
+ "grad_norm": 42.32719039916992,
272
+ "learning_rate": 7.322057787174067e-06,
273
+ "loss": 0.1574,
274
+ "step": 1800
275
+ },
276
+ {
277
+ "epoch": 1.955602536997886,
278
+ "grad_norm": 7.198770046234131,
279
+ "learning_rate": 6.969696969696971e-06,
280
+ "loss": 0.1796,
281
+ "step": 1850
282
+ },
283
+ {
284
+ "epoch": 2.0,
285
+ "eval_f1_macro": 0.9171441852345779,
286
+ "eval_f1_micro": 0.9165967444202048,
287
+ "eval_loss": 0.207748144865036,
288
+ "eval_precision": 0.9397797660013765,
289
+ "eval_recall": 0.8945299705207992,
290
+ "eval_runtime": 10.3145,
291
+ "eval_samples_per_second": 366.667,
292
+ "eval_steps_per_second": 22.977,
293
+ "step": 1892
294
+ },
295
+ {
296
+ "epoch": 2.0084566596194504,
297
+ "grad_norm": 3.685957193374634,
298
+ "learning_rate": 6.6173361522198745e-06,
299
+ "loss": 0.1328,
300
+ "step": 1900
301
+ },
302
+ {
303
+ "epoch": 2.061310782241015,
304
+ "grad_norm": 12.144611358642578,
305
+ "learning_rate": 6.2649753347427766e-06,
306
+ "loss": 0.1201,
307
+ "step": 1950
308
+ },
309
+ {
310
+ "epoch": 2.1141649048625792,
311
+ "grad_norm": 26.261823654174805,
312
+ "learning_rate": 5.91261451726568e-06,
313
+ "loss": 0.1465,
314
+ "step": 2000
315
+ },
316
+ {
317
+ "epoch": 2.1670190274841437,
318
+ "grad_norm": 42.77655029296875,
319
+ "learning_rate": 5.560253699788583e-06,
320
+ "loss": 0.1231,
321
+ "step": 2050
322
+ },
323
+ {
324
+ "epoch": 2.219873150105708,
325
+ "grad_norm": 10.951128959655762,
326
+ "learning_rate": 5.207892882311487e-06,
327
+ "loss": 0.1367,
328
+ "step": 2100
329
+ },
330
+ {
331
+ "epoch": 2.2727272727272725,
332
+ "grad_norm": 12.861650466918945,
333
+ "learning_rate": 4.855532064834391e-06,
334
+ "loss": 0.1056,
335
+ "step": 2150
336
+ },
337
+ {
338
+ "epoch": 2.3255813953488373,
339
+ "grad_norm": 16.26862144470215,
340
+ "learning_rate": 4.5031712473572945e-06,
341
+ "loss": 0.1259,
342
+ "step": 2200
343
+ },
344
+ {
345
+ "epoch": 2.3784355179704018,
346
+ "grad_norm": 11.81278133392334,
347
+ "learning_rate": 4.150810429880197e-06,
348
+ "loss": 0.1214,
349
+ "step": 2250
350
+ },
351
+ {
352
+ "epoch": 2.431289640591966,
353
+ "grad_norm": 1.1252022981643677,
354
+ "learning_rate": 3.798449612403101e-06,
355
+ "loss": 0.1017,
356
+ "step": 2300
357
+ },
358
+ {
359
+ "epoch": 2.4841437632135306,
360
+ "grad_norm": 11.047788619995117,
361
+ "learning_rate": 3.4460887949260045e-06,
362
+ "loss": 0.1263,
363
+ "step": 2350
364
+ },
365
+ {
366
+ "epoch": 2.536997885835095,
367
+ "grad_norm": 16.96929359436035,
368
+ "learning_rate": 3.0937279774489083e-06,
369
+ "loss": 0.0986,
370
+ "step": 2400
371
+ },
372
+ {
373
+ "epoch": 2.58985200845666,
374
+ "grad_norm": 2.476245164871216,
375
+ "learning_rate": 2.741367159971811e-06,
376
+ "loss": 0.1105,
377
+ "step": 2450
378
+ },
379
+ {
380
+ "epoch": 2.6427061310782243,
381
+ "grad_norm": 5.029143810272217,
382
+ "learning_rate": 2.389006342494715e-06,
383
+ "loss": 0.1219,
384
+ "step": 2500
385
+ },
386
+ {
387
+ "epoch": 2.6955602536997887,
388
+ "grad_norm": 16.73661231994629,
389
+ "learning_rate": 2.0366455250176183e-06,
390
+ "loss": 0.1087,
391
+ "step": 2550
392
+ },
393
+ {
394
+ "epoch": 2.748414376321353,
395
+ "grad_norm": 1.3909024000167847,
396
+ "learning_rate": 1.6842847075405216e-06,
397
+ "loss": 0.1118,
398
+ "step": 2600
399
+ },
400
+ {
401
+ "epoch": 2.8012684989429175,
402
+ "grad_norm": 14.585844993591309,
403
+ "learning_rate": 1.3319238900634251e-06,
404
+ "loss": 0.1018,
405
+ "step": 2650
406
+ },
407
+ {
408
+ "epoch": 2.854122621564482,
409
+ "grad_norm": 24.876344680786133,
410
+ "learning_rate": 9.795630725863285e-07,
411
+ "loss": 0.1001,
412
+ "step": 2700
413
+ },
414
+ {
415
+ "epoch": 2.9069767441860463,
416
+ "grad_norm": 28.51801872253418,
417
+ "learning_rate": 6.272022551092319e-07,
418
+ "loss": 0.1004,
419
+ "step": 2750
420
+ },
421
+ {
422
+ "epoch": 2.9598308668076108,
423
+ "grad_norm": 2.746856689453125,
424
+ "learning_rate": 2.748414376321353e-07,
425
+ "loss": 0.1181,
426
+ "step": 2800
427
+ },
428
+ {
429
+ "epoch": 3.0,
430
+ "eval_f1_macro": 0.927417514244305,
431
+ "eval_f1_micro": 0.9269727168763274,
432
+ "eval_loss": 0.2070944607257843,
433
+ "eval_precision": 0.9247066492829205,
434
+ "eval_recall": 0.9292499181133311,
435
+ "eval_runtime": 10.2948,
436
+ "eval_samples_per_second": 367.371,
437
+ "eval_steps_per_second": 23.021,
438
+ "step": 2838
439
+ }
440
+ ],
441
+ "logging_steps": 50,
442
+ "max_steps": 2838,
443
+ "num_input_tokens_seen": 0,
444
+ "num_train_epochs": 3,
445
+ "save_steps": 500,
446
+ "stateful_callbacks": {
447
+ "TrainerControl": {
448
+ "args": {
449
+ "should_epoch_stop": false,
450
+ "should_evaluate": false,
451
+ "should_log": false,
452
+ "should_save": true,
453
+ "should_training_stop": true
454
+ },
455
+ "attributes": {}
456
+ }
457
+ },
458
+ "total_flos": 1.200759923346432e+16,
459
+ "train_batch_size": 16,
460
+ "trial_name": null,
461
+ "trial_params": null
462
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eb33328b3cfeaf6c9f43992d935e1c45dbddcace1650f3bb41661285972d35c
3
+ size 5777