llava-med-v1.5-mistral-7b-finetune-grounding-v4-lora-12-epochs_2a100_40g_20250803 / trainer_state.json
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 11.928792569659443, | |
| "eval_steps": 500, | |
| "global_step": 1932, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.006191950464396285, | |
| "grad_norm": 583.2729636731592, | |
| "learning_rate": 0.0, | |
| "loss": 16.1607, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.01238390092879257, | |
| "grad_norm": 510.1711028527366, | |
| "learning_rate": 1.724137931034483e-06, | |
| "loss": 15.6126, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.018575851393188854, | |
| "grad_norm": 432.5760222925412, | |
| "learning_rate": 3.448275862068966e-06, | |
| "loss": 15.1224, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.02476780185758514, | |
| "grad_norm": 227.61889624860476, | |
| "learning_rate": 5.172413793103448e-06, | |
| "loss": 13.282, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.030959752321981424, | |
| "grad_norm": 158.07575641446655, | |
| "learning_rate": 6.896551724137932e-06, | |
| "loss": 12.5074, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.03715170278637771, | |
| "grad_norm": 154.3189118557477, | |
| "learning_rate": 8.620689655172414e-06, | |
| "loss": 12.0762, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.043343653250773995, | |
| "grad_norm": 76.42575226139347, | |
| "learning_rate": 1.0344827586206897e-05, | |
| "loss": 10.4845, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.04953560371517028, | |
| "grad_norm": 74.16335719258294, | |
| "learning_rate": 1.206896551724138e-05, | |
| "loss": 9.0803, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.05572755417956656, | |
| "grad_norm": 74.93338825694016, | |
| "learning_rate": 1.3793103448275863e-05, | |
| "loss": 8.8984, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.06191950464396285, | |
| "grad_norm": 72.98575970833309, | |
| "learning_rate": 1.5517241379310346e-05, | |
| "loss": 8.4824, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06811145510835913, | |
| "grad_norm": 60.95311182317466, | |
| "learning_rate": 1.7241379310344828e-05, | |
| "loss": 8.0288, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.07430340557275542, | |
| "grad_norm": 25.949986997829786, | |
| "learning_rate": 1.896551724137931e-05, | |
| "loss": 7.4985, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0804953560371517, | |
| "grad_norm": 48.812709487118596, | |
| "learning_rate": 2.0689655172413793e-05, | |
| "loss": 7.1892, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.08668730650154799, | |
| "grad_norm": 17.975953609206936, | |
| "learning_rate": 2.2413793103448276e-05, | |
| "loss": 7.2859, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.09287925696594428, | |
| "grad_norm": 13.444400027966196, | |
| "learning_rate": 2.413793103448276e-05, | |
| "loss": 6.8293, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.09907120743034056, | |
| "grad_norm": 16.906524456262332, | |
| "learning_rate": 2.5862068965517244e-05, | |
| "loss": 6.5689, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.10526315789473684, | |
| "grad_norm": 14.96615376509466, | |
| "learning_rate": 2.7586206896551727e-05, | |
| "loss": 6.477, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.11145510835913312, | |
| "grad_norm": 33.3089382115632, | |
| "learning_rate": 2.9310344827586206e-05, | |
| "loss": 6.4738, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.11764705882352941, | |
| "grad_norm": 132.30119742848726, | |
| "learning_rate": 3.103448275862069e-05, | |
| "loss": 6.1718, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.1238390092879257, | |
| "grad_norm": 14.885009490288567, | |
| "learning_rate": 3.275862068965517e-05, | |
| "loss": 5.9924, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.13003095975232198, | |
| "grad_norm": 14.20583957199301, | |
| "learning_rate": 3.4482758620689657e-05, | |
| "loss": 5.8076, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.13622291021671826, | |
| "grad_norm": 14.052226006442716, | |
| "learning_rate": 3.620689655172414e-05, | |
| "loss": 5.013, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.14241486068111456, | |
| "grad_norm": 13.91506279716363, | |
| "learning_rate": 3.793103448275862e-05, | |
| "loss": 5.3075, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.14860681114551083, | |
| "grad_norm": 12.62823248730767, | |
| "learning_rate": 3.965517241379311e-05, | |
| "loss": 5.2177, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.15479876160990713, | |
| "grad_norm": 19.04525031939675, | |
| "learning_rate": 4.1379310344827587e-05, | |
| "loss": 4.9121, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1609907120743034, | |
| "grad_norm": 10.728631362957593, | |
| "learning_rate": 4.3103448275862066e-05, | |
| "loss": 4.6596, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.16718266253869968, | |
| "grad_norm": 13.956262307265314, | |
| "learning_rate": 4.482758620689655e-05, | |
| "loss": 4.7653, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.17337461300309598, | |
| "grad_norm": 10.083505632824219, | |
| "learning_rate": 4.655172413793104e-05, | |
| "loss": 4.4974, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.17956656346749225, | |
| "grad_norm": 9.936223155180423, | |
| "learning_rate": 4.827586206896552e-05, | |
| "loss": 4.7099, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.18575851393188855, | |
| "grad_norm": 13.494395241748343, | |
| "learning_rate": 5e-05, | |
| "loss": 4.4682, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.19195046439628483, | |
| "grad_norm": 9.751210714554487, | |
| "learning_rate": 5.172413793103449e-05, | |
| "loss": 4.6888, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.19814241486068113, | |
| "grad_norm": 8.53427701687458, | |
| "learning_rate": 5.344827586206896e-05, | |
| "loss": 4.3002, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.2043343653250774, | |
| "grad_norm": 18.845223437943833, | |
| "learning_rate": 5.517241379310345e-05, | |
| "loss": 4.5649, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.21052631578947367, | |
| "grad_norm": 14.037468185845768, | |
| "learning_rate": 5.689655172413794e-05, | |
| "loss": 4.1247, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.21671826625386997, | |
| "grad_norm": 10.461800393764017, | |
| "learning_rate": 5.862068965517241e-05, | |
| "loss": 4.173, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.22291021671826625, | |
| "grad_norm": 13.442613201138604, | |
| "learning_rate": 6.03448275862069e-05, | |
| "loss": 4.5809, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.22910216718266255, | |
| "grad_norm": 9.464361097873487, | |
| "learning_rate": 6.206896551724138e-05, | |
| "loss": 4.1074, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 9.665048525162632, | |
| "learning_rate": 6.379310344827587e-05, | |
| "loss": 4.1199, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.24148606811145512, | |
| "grad_norm": 9.659469642364447, | |
| "learning_rate": 6.551724137931034e-05, | |
| "loss": 4.1702, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.2476780185758514, | |
| "grad_norm": 8.152823124328645, | |
| "learning_rate": 6.724137931034483e-05, | |
| "loss": 3.9314, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.25386996904024767, | |
| "grad_norm": 8.452741968127802, | |
| "learning_rate": 6.896551724137931e-05, | |
| "loss": 3.8782, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.26006191950464397, | |
| "grad_norm": 10.437508209318183, | |
| "learning_rate": 7.06896551724138e-05, | |
| "loss": 4.2638, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.26625386996904027, | |
| "grad_norm": 9.474156238032947, | |
| "learning_rate": 7.241379310344828e-05, | |
| "loss": 3.9874, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.2724458204334365, | |
| "grad_norm": 7.638324788974118, | |
| "learning_rate": 7.413793103448277e-05, | |
| "loss": 3.8656, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.2786377708978328, | |
| "grad_norm": 8.32912958344573, | |
| "learning_rate": 7.586206896551724e-05, | |
| "loss": 3.812, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.2848297213622291, | |
| "grad_norm": 9.374949463316568, | |
| "learning_rate": 7.758620689655173e-05, | |
| "loss": 3.7579, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.29102167182662536, | |
| "grad_norm": 11.53047562375587, | |
| "learning_rate": 7.931034482758621e-05, | |
| "loss": 3.8599, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.29721362229102166, | |
| "grad_norm": 7.0175519843629, | |
| "learning_rate": 8.103448275862069e-05, | |
| "loss": 3.6109, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.30340557275541796, | |
| "grad_norm": 6.559697059667157, | |
| "learning_rate": 8.275862068965517e-05, | |
| "loss": 3.6014, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.30959752321981426, | |
| "grad_norm": 6.717777404862978, | |
| "learning_rate": 8.448275862068966e-05, | |
| "loss": 3.7019, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3157894736842105, | |
| "grad_norm": 5.982207393646023, | |
| "learning_rate": 8.620689655172413e-05, | |
| "loss": 3.4398, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.3219814241486068, | |
| "grad_norm": 5.945506225475662, | |
| "learning_rate": 8.793103448275862e-05, | |
| "loss": 3.4582, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.3281733746130031, | |
| "grad_norm": 5.704596615160976, | |
| "learning_rate": 8.96551724137931e-05, | |
| "loss": 3.6077, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.33436532507739936, | |
| "grad_norm": 5.973311803401044, | |
| "learning_rate": 9.137931034482759e-05, | |
| "loss": 3.5219, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.34055727554179566, | |
| "grad_norm": 6.814299767123095, | |
| "learning_rate": 9.310344827586207e-05, | |
| "loss": 3.7694, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.34674922600619196, | |
| "grad_norm": 7.052917863988468, | |
| "learning_rate": 9.482758620689656e-05, | |
| "loss": 3.6753, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.35294117647058826, | |
| "grad_norm": 5.525773749757123, | |
| "learning_rate": 9.655172413793105e-05, | |
| "loss": 3.5596, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.3591331269349845, | |
| "grad_norm": 5.3240485466656295, | |
| "learning_rate": 9.827586206896552e-05, | |
| "loss": 3.3443, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.3653250773993808, | |
| "grad_norm": 5.475602381517988, | |
| "learning_rate": 0.0001, | |
| "loss": 3.5009, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.3715170278637771, | |
| "grad_norm": 5.892096476556127, | |
| "learning_rate": 9.999992974124023e-05, | |
| "loss": 3.5758, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.37770897832817335, | |
| "grad_norm": 4.838516017994781, | |
| "learning_rate": 9.999971896515837e-05, | |
| "loss": 3.3336, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.38390092879256965, | |
| "grad_norm": 5.246620602818011, | |
| "learning_rate": 9.999936767234674e-05, | |
| "loss": 3.4909, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.39009287925696595, | |
| "grad_norm": 6.760983626210936, | |
| "learning_rate": 9.999887586379264e-05, | |
| "loss": 3.6338, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.39628482972136225, | |
| "grad_norm": 6.012748270095294, | |
| "learning_rate": 9.99982435408782e-05, | |
| "loss": 3.586, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.4024767801857585, | |
| "grad_norm": 6.5822760683818515, | |
| "learning_rate": 9.999747070538049e-05, | |
| "loss": 3.8235, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.4086687306501548, | |
| "grad_norm": 4.95793646000932, | |
| "learning_rate": 9.999655735947144e-05, | |
| "loss": 3.5195, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.4148606811145511, | |
| "grad_norm": 5.096298290945092, | |
| "learning_rate": 9.999550350571785e-05, | |
| "loss": 3.5608, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "grad_norm": 5.023897068332846, | |
| "learning_rate": 9.999430914708143e-05, | |
| "loss": 3.4636, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.42724458204334365, | |
| "grad_norm": 5.0162625600997695, | |
| "learning_rate": 9.999297428691878e-05, | |
| "loss": 3.4252, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.43343653250773995, | |
| "grad_norm": 5.347697718620594, | |
| "learning_rate": 9.999149892898127e-05, | |
| "loss": 3.4712, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.43962848297213625, | |
| "grad_norm": 5.277743291050418, | |
| "learning_rate": 9.998988307741521e-05, | |
| "loss": 3.3179, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.4458204334365325, | |
| "grad_norm": 5.092008006469729, | |
| "learning_rate": 9.99881267367617e-05, | |
| "loss": 3.2156, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.4520123839009288, | |
| "grad_norm": 5.602962681853112, | |
| "learning_rate": 9.998622991195668e-05, | |
| "loss": 3.6088, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.4582043343653251, | |
| "grad_norm": 4.618945934546075, | |
| "learning_rate": 9.998419260833087e-05, | |
| "loss": 3.401, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.46439628482972134, | |
| "grad_norm": 4.407828092310794, | |
| "learning_rate": 9.998201483160981e-05, | |
| "loss": 3.3034, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 5.060837334792514, | |
| "learning_rate": 9.997969658791384e-05, | |
| "loss": 3.4433, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.47678018575851394, | |
| "grad_norm": 4.8844393650400475, | |
| "learning_rate": 9.997723788375803e-05, | |
| "loss": 3.4946, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.48297213622291024, | |
| "grad_norm": 4.739079596692814, | |
| "learning_rate": 9.997463872605217e-05, | |
| "loss": 3.5946, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.4891640866873065, | |
| "grad_norm": 4.180305626117479, | |
| "learning_rate": 9.997189912210085e-05, | |
| "loss": 3.3336, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.4953560371517028, | |
| "grad_norm": 4.4496272009926665, | |
| "learning_rate": 9.996901907960329e-05, | |
| "loss": 3.3069, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5015479876160991, | |
| "grad_norm": 4.839247704934278, | |
| "learning_rate": 9.99659986066534e-05, | |
| "loss": 3.456, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.5077399380804953, | |
| "grad_norm": 4.096642605457276, | |
| "learning_rate": 9.996283771173982e-05, | |
| "loss": 3.3263, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.5139318885448917, | |
| "grad_norm": 5.436181697917328, | |
| "learning_rate": 9.995953640374573e-05, | |
| "loss": 3.4265, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.5201238390092879, | |
| "grad_norm": 4.030576988739356, | |
| "learning_rate": 9.995609469194897e-05, | |
| "loss": 3.2658, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 4.030663123263123, | |
| "learning_rate": 9.995251258602199e-05, | |
| "loss": 3.2758, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.5325077399380805, | |
| "grad_norm": 4.482470637813161, | |
| "learning_rate": 9.99487900960317e-05, | |
| "loss": 3.2483, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.5386996904024768, | |
| "grad_norm": 4.652325115807756, | |
| "learning_rate": 9.994492723243965e-05, | |
| "loss": 3.3789, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.544891640866873, | |
| "grad_norm": 4.9472490461802145, | |
| "learning_rate": 9.994092400610182e-05, | |
| "loss": 3.4134, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.5510835913312694, | |
| "grad_norm": 5.586407428655971, | |
| "learning_rate": 9.993678042826868e-05, | |
| "loss": 3.4805, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.5572755417956656, | |
| "grad_norm": 4.060091954707022, | |
| "learning_rate": 9.993249651058513e-05, | |
| "loss": 3.2706, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5634674922600619, | |
| "grad_norm": 4.208423705318374, | |
| "learning_rate": 9.992807226509051e-05, | |
| "loss": 3.2509, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.5696594427244582, | |
| "grad_norm": 4.259213911444956, | |
| "learning_rate": 9.992350770421848e-05, | |
| "loss": 3.3747, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.5758513931888545, | |
| "grad_norm": 4.0529186511539885, | |
| "learning_rate": 9.991880284079704e-05, | |
| "loss": 3.3591, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.5820433436532507, | |
| "grad_norm": 4.989718273873512, | |
| "learning_rate": 9.991395768804852e-05, | |
| "loss": 3.2976, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 3.7531790113696353, | |
| "learning_rate": 9.990897225958951e-05, | |
| "loss": 3.0671, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.5944272445820433, | |
| "grad_norm": 4.578086243141991, | |
| "learning_rate": 9.990384656943079e-05, | |
| "loss": 3.3136, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.6006191950464397, | |
| "grad_norm": 4.239806465153399, | |
| "learning_rate": 9.989858063197735e-05, | |
| "loss": 3.1344, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.6068111455108359, | |
| "grad_norm": 5.296536247781439, | |
| "learning_rate": 9.989317446202832e-05, | |
| "loss": 3.3383, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.6130030959752322, | |
| "grad_norm": 4.833348660562072, | |
| "learning_rate": 9.988762807477693e-05, | |
| "loss": 3.244, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.6191950464396285, | |
| "grad_norm": 4.148795233753714, | |
| "learning_rate": 9.988194148581048e-05, | |
| "loss": 3.2728, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.6253869969040248, | |
| "grad_norm": 4.269419850788365, | |
| "learning_rate": 9.987611471111027e-05, | |
| "loss": 3.1576, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.631578947368421, | |
| "grad_norm": 4.494909466750201, | |
| "learning_rate": 9.98701477670516e-05, | |
| "loss": 3.3776, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.6377708978328174, | |
| "grad_norm": 4.12371607549373, | |
| "learning_rate": 9.986404067040363e-05, | |
| "loss": 3.2136, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.6439628482972136, | |
| "grad_norm": 4.1066986326075545, | |
| "learning_rate": 9.985779343832947e-05, | |
| "loss": 3.342, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.6501547987616099, | |
| "grad_norm": 3.411792974866172, | |
| "learning_rate": 9.985140608838604e-05, | |
| "loss": 3.0947, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.6563467492260062, | |
| "grad_norm": 3.784426944839016, | |
| "learning_rate": 9.984487863852401e-05, | |
| "loss": 3.0899, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.6625386996904025, | |
| "grad_norm": 4.337957539267078, | |
| "learning_rate": 9.98382111070878e-05, | |
| "loss": 3.1851, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.6687306501547987, | |
| "grad_norm": 4.6165406721174955, | |
| "learning_rate": 9.983140351281554e-05, | |
| "loss": 3.2591, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.6749226006191951, | |
| "grad_norm": 3.895886556884958, | |
| "learning_rate": 9.982445587483892e-05, | |
| "loss": 3.074, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.6811145510835913, | |
| "grad_norm": 5.790495625569918, | |
| "learning_rate": 9.981736821268325e-05, | |
| "loss": 3.4478, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6873065015479877, | |
| "grad_norm": 4.040001891109945, | |
| "learning_rate": 9.981014054626735e-05, | |
| "loss": 3.1273, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.6934984520123839, | |
| "grad_norm": 4.9533963229924725, | |
| "learning_rate": 9.980277289590349e-05, | |
| "loss": 3.2547, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.6996904024767802, | |
| "grad_norm": 3.5329595802639893, | |
| "learning_rate": 9.979526528229737e-05, | |
| "loss": 3.1815, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 4.953624964196446, | |
| "learning_rate": 9.978761772654797e-05, | |
| "loss": 3.3241, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.7120743034055728, | |
| "grad_norm": 3.538087255596596, | |
| "learning_rate": 9.977983025014764e-05, | |
| "loss": 3.1501, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.718266253869969, | |
| "grad_norm": 4.243934280107101, | |
| "learning_rate": 9.977190287498191e-05, | |
| "loss": 3.1266, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.7244582043343654, | |
| "grad_norm": 4.2764939685444325, | |
| "learning_rate": 9.976383562332944e-05, | |
| "loss": 3.273, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.7306501547987616, | |
| "grad_norm": 4.97238240181323, | |
| "learning_rate": 9.975562851786211e-05, | |
| "loss": 3.2663, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.7368421052631579, | |
| "grad_norm": 3.887695247258602, | |
| "learning_rate": 9.97472815816447e-05, | |
| "loss": 3.2012, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.7430340557275542, | |
| "grad_norm": 3.9487406631272832, | |
| "learning_rate": 9.973879483813506e-05, | |
| "loss": 3.1826, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.7492260061919505, | |
| "grad_norm": 3.8144073397429756, | |
| "learning_rate": 9.973016831118389e-05, | |
| "loss": 3.2853, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.7554179566563467, | |
| "grad_norm": 4.337511589444347, | |
| "learning_rate": 9.972140202503477e-05, | |
| "loss": 3.122, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.7616099071207431, | |
| "grad_norm": 3.993475854912506, | |
| "learning_rate": 9.971249600432402e-05, | |
| "loss": 3.1977, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.7678018575851393, | |
| "grad_norm": 3.6866484704267104, | |
| "learning_rate": 9.97034502740807e-05, | |
| "loss": 3.1031, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.7739938080495357, | |
| "grad_norm": 3.709528741959154, | |
| "learning_rate": 9.969426485972645e-05, | |
| "loss": 3.0934, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.7801857585139319, | |
| "grad_norm": 3.9467902947943148, | |
| "learning_rate": 9.968493978707554e-05, | |
| "loss": 3.2937, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.7863777089783281, | |
| "grad_norm": 4.339424645551262, | |
| "learning_rate": 9.967547508233466e-05, | |
| "loss": 3.2433, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.7925696594427245, | |
| "grad_norm": 3.8363601911667766, | |
| "learning_rate": 9.966587077210297e-05, | |
| "loss": 3.1673, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.7987616099071208, | |
| "grad_norm": 3.750919154659581, | |
| "learning_rate": 9.965612688337194e-05, | |
| "loss": 3.0407, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.804953560371517, | |
| "grad_norm": 3.7822587436662016, | |
| "learning_rate": 9.96462434435253e-05, | |
| "loss": 3.1473, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.8111455108359134, | |
| "grad_norm": 4.247847819904635, | |
| "learning_rate": 9.963622048033898e-05, | |
| "loss": 3.19, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.8173374613003096, | |
| "grad_norm": 3.9735446888529964, | |
| "learning_rate": 9.962605802198104e-05, | |
| "loss": 3.1126, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.8235294117647058, | |
| "grad_norm": 3.9880407535617883, | |
| "learning_rate": 9.961575609701154e-05, | |
| "loss": 3.102, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.8297213622291022, | |
| "grad_norm": 3.644066398077279, | |
| "learning_rate": 9.960531473438248e-05, | |
| "loss": 3.1088, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.8359133126934984, | |
| "grad_norm": 3.772634387387643, | |
| "learning_rate": 9.959473396343777e-05, | |
| "loss": 3.1237, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.8421052631578947, | |
| "grad_norm": 3.620434973837521, | |
| "learning_rate": 9.958401381391307e-05, | |
| "loss": 3.1037, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.848297213622291, | |
| "grad_norm": 3.3883607787252497, | |
| "learning_rate": 9.957315431593577e-05, | |
| "loss": 3.0548, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.8544891640866873, | |
| "grad_norm": 3.525654083685345, | |
| "learning_rate": 9.956215550002485e-05, | |
| "loss": 3.0744, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.8606811145510835, | |
| "grad_norm": 3.276172822856916, | |
| "learning_rate": 9.955101739709086e-05, | |
| "loss": 3.107, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.8668730650154799, | |
| "grad_norm": 3.7505406680079796, | |
| "learning_rate": 9.953974003843572e-05, | |
| "loss": 3.0649, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.8730650154798761, | |
| "grad_norm": 3.4245906115032128, | |
| "learning_rate": 9.952832345575282e-05, | |
| "loss": 3.1105, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.8792569659442725, | |
| "grad_norm": 3.999576346323058, | |
| "learning_rate": 9.951676768112672e-05, | |
| "loss": 3.2317, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.8854489164086687, | |
| "grad_norm": 6.72936741544915, | |
| "learning_rate": 9.950507274703323e-05, | |
| "loss": 3.2017, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.891640866873065, | |
| "grad_norm": 4.253970968740358, | |
| "learning_rate": 9.949323868633916e-05, | |
| "loss": 3.1011, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.8978328173374613, | |
| "grad_norm": 6.551804513164246, | |
| "learning_rate": 9.948126553230241e-05, | |
| "loss": 3.1527, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.9040247678018576, | |
| "grad_norm": 4.6411829687147526, | |
| "learning_rate": 9.946915331857171e-05, | |
| "loss": 3.1914, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.9102167182662538, | |
| "grad_norm": 3.6707162993101052, | |
| "learning_rate": 9.945690207918666e-05, | |
| "loss": 3.0628, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.9164086687306502, | |
| "grad_norm": 4.333487880078733, | |
| "learning_rate": 9.944451184857751e-05, | |
| "loss": 3.0723, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.9226006191950464, | |
| "grad_norm": 4.643228355107828, | |
| "learning_rate": 9.943198266156516e-05, | |
| "loss": 3.2188, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.9287925696594427, | |
| "grad_norm": 3.81900951272679, | |
| "learning_rate": 9.9419314553361e-05, | |
| "loss": 3.1535, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.934984520123839, | |
| "grad_norm": 3.483225515541042, | |
| "learning_rate": 9.940650755956686e-05, | |
| "loss": 3.0807, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 4.054952951582781, | |
| "learning_rate": 9.93935617161749e-05, | |
| "loss": 3.2731, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.9473684210526315, | |
| "grad_norm": 4.045010851938665, | |
| "learning_rate": 9.938047705956746e-05, | |
| "loss": 3.2919, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.9535603715170279, | |
| "grad_norm": 3.880953002409461, | |
| "learning_rate": 9.936725362651699e-05, | |
| "loss": 2.9833, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.9597523219814241, | |
| "grad_norm": 3.8222791710005684, | |
| "learning_rate": 9.935389145418599e-05, | |
| "loss": 3.1347, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.9659442724458205, | |
| "grad_norm": 3.6318649766832016, | |
| "learning_rate": 9.934039058012686e-05, | |
| "loss": 3.0258, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.9721362229102167, | |
| "grad_norm": 4.207757115469884, | |
| "learning_rate": 9.932675104228177e-05, | |
| "loss": 3.189, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.978328173374613, | |
| "grad_norm": 3.650959223126377, | |
| "learning_rate": 9.931297287898259e-05, | |
| "loss": 3.1595, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.9845201238390093, | |
| "grad_norm": 3.5893527928395668, | |
| "learning_rate": 9.929905612895081e-05, | |
| "loss": 3.1518, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.9907120743034056, | |
| "grad_norm": 4.052866492865934, | |
| "learning_rate": 9.928500083129736e-05, | |
| "loss": 2.9796, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.9969040247678018, | |
| "grad_norm": 3.4284326492586814, | |
| "learning_rate": 9.927080702552254e-05, | |
| "loss": 2.988, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 3.4284326492586814, | |
| "learning_rate": 9.925647475151596e-05, | |
| "loss": 1.6511, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 1.0061919504643964, | |
| "grad_norm": 3.6814653270432887, | |
| "learning_rate": 9.924200404955629e-05, | |
| "loss": 2.7978, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 1.0123839009287925, | |
| "grad_norm": 3.1067979115563875, | |
| "learning_rate": 9.922739496031129e-05, | |
| "loss": 2.8784, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 1.0185758513931888, | |
| "grad_norm": 3.811812480430689, | |
| "learning_rate": 9.921264752483761e-05, | |
| "loss": 3.0437, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.0247678018575852, | |
| "grad_norm": 3.662249859659687, | |
| "learning_rate": 9.919776178458071e-05, | |
| "loss": 3.0534, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 1.0309597523219813, | |
| "grad_norm": 3.5933991920665718, | |
| "learning_rate": 9.918273778137477e-05, | |
| "loss": 2.9006, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 1.0371517027863777, | |
| "grad_norm": 3.809378521805632, | |
| "learning_rate": 9.916757555744245e-05, | |
| "loss": 2.9678, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 1.043343653250774, | |
| "grad_norm": 3.403865267525689, | |
| "learning_rate": 9.915227515539496e-05, | |
| "loss": 2.9319, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 1.0495356037151702, | |
| "grad_norm": 3.745099532944618, | |
| "learning_rate": 9.913683661823176e-05, | |
| "loss": 3.0997, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.0557275541795665, | |
| "grad_norm": 3.545813633080689, | |
| "learning_rate": 9.912125998934055e-05, | |
| "loss": 2.9005, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 1.061919504643963, | |
| "grad_norm": 3.44006824427855, | |
| "learning_rate": 9.910554531249714e-05, | |
| "loss": 3.0533, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 1.068111455108359, | |
| "grad_norm": 4.183934094926584, | |
| "learning_rate": 9.908969263186525e-05, | |
| "loss": 3.0094, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 1.0743034055727554, | |
| "grad_norm": 3.988230695923216, | |
| "learning_rate": 9.907370199199648e-05, | |
| "loss": 2.9598, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 1.0804953560371517, | |
| "grad_norm": 4.058788474570464, | |
| "learning_rate": 9.905757343783014e-05, | |
| "loss": 2.9081, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.086687306501548, | |
| "grad_norm": 3.721071325513958, | |
| "learning_rate": 9.904130701469309e-05, | |
| "loss": 3.0445, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 1.0928792569659442, | |
| "grad_norm": 3.507314113135694, | |
| "learning_rate": 9.902490276829971e-05, | |
| "loss": 2.9417, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 1.0990712074303406, | |
| "grad_norm": 3.659862506679075, | |
| "learning_rate": 9.900836074475165e-05, | |
| "loss": 3.0042, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 1.1052631578947367, | |
| "grad_norm": 4.343669608302679, | |
| "learning_rate": 9.899168099053783e-05, | |
| "loss": 2.9413, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 1.111455108359133, | |
| "grad_norm": 3.3781335101832872, | |
| "learning_rate": 9.897486355253419e-05, | |
| "loss": 2.9604, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.1176470588235294, | |
| "grad_norm": 3.469752850980135, | |
| "learning_rate": 9.895790847800361e-05, | |
| "loss": 2.9337, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 1.1238390092879258, | |
| "grad_norm": 3.2733115281041543, | |
| "learning_rate": 9.894081581459578e-05, | |
| "loss": 2.9247, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 1.130030959752322, | |
| "grad_norm": 3.2291083717016797, | |
| "learning_rate": 9.892358561034711e-05, | |
| "loss": 2.9189, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 1.1362229102167183, | |
| "grad_norm": 3.176371624508396, | |
| "learning_rate": 9.890621791368049e-05, | |
| "loss": 2.9176, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 1.1424148606811146, | |
| "grad_norm": 3.1779099737432275, | |
| "learning_rate": 9.888871277340522e-05, | |
| "loss": 2.9177, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.1486068111455108, | |
| "grad_norm": 3.4931730579286517, | |
| "learning_rate": 9.887107023871691e-05, | |
| "loss": 2.8821, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.1547987616099071, | |
| "grad_norm": 3.640445043121747, | |
| "learning_rate": 9.885329035919724e-05, | |
| "loss": 2.9101, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.1609907120743035, | |
| "grad_norm": 3.3296285577441322, | |
| "learning_rate": 9.88353731848139e-05, | |
| "loss": 2.8937, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.1671826625386996, | |
| "grad_norm": 3.6268853078486494, | |
| "learning_rate": 9.881731876592045e-05, | |
| "loss": 2.9918, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.173374613003096, | |
| "grad_norm": 3.8683456506715803, | |
| "learning_rate": 9.879912715325612e-05, | |
| "loss": 2.8969, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.1795665634674923, | |
| "grad_norm": 3.465926903871117, | |
| "learning_rate": 9.878079839794571e-05, | |
| "loss": 2.9277, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.1857585139318885, | |
| "grad_norm": 3.4135357034349245, | |
| "learning_rate": 9.876233255149945e-05, | |
| "loss": 2.855, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.1919504643962848, | |
| "grad_norm": 3.4138858890956487, | |
| "learning_rate": 9.874372966581285e-05, | |
| "loss": 2.8223, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.1981424148606812, | |
| "grad_norm": 3.1946174932663407, | |
| "learning_rate": 9.87249897931665e-05, | |
| "loss": 2.8626, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.2043343653250773, | |
| "grad_norm": 3.395109193915768, | |
| "learning_rate": 9.870611298622605e-05, | |
| "loss": 2.9117, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.2105263157894737, | |
| "grad_norm": 4.160726453183203, | |
| "learning_rate": 9.868709929804193e-05, | |
| "loss": 3.0261, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.21671826625387, | |
| "grad_norm": 3.7222325769945863, | |
| "learning_rate": 9.866794878204926e-05, | |
| "loss": 2.9805, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.2229102167182662, | |
| "grad_norm": 3.9212738228776742, | |
| "learning_rate": 9.86486614920677e-05, | |
| "loss": 2.8858, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.2291021671826625, | |
| "grad_norm": 3.6580281529557195, | |
| "learning_rate": 9.862923748230129e-05, | |
| "loss": 2.9386, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.2352941176470589, | |
| "grad_norm": 3.4226197107570133, | |
| "learning_rate": 9.860967680733831e-05, | |
| "loss": 2.9348, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.2414860681114552, | |
| "grad_norm": 4.0166432759377075, | |
| "learning_rate": 9.858997952215112e-05, | |
| "loss": 2.8854, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.2476780185758514, | |
| "grad_norm": 3.8146296663629933, | |
| "learning_rate": 9.857014568209597e-05, | |
| "loss": 2.971, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.2538699690402477, | |
| "grad_norm": 3.606205528393454, | |
| "learning_rate": 9.855017534291292e-05, | |
| "loss": 2.8111, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.2600619195046439, | |
| "grad_norm": 3.041777523006725, | |
| "learning_rate": 9.853006856072561e-05, | |
| "loss": 2.8318, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.2662538699690402, | |
| "grad_norm": 3.0864585116855596, | |
| "learning_rate": 9.850982539204115e-05, | |
| "loss": 2.8505, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.2724458204334366, | |
| "grad_norm": 3.321546800155687, | |
| "learning_rate": 9.848944589374993e-05, | |
| "loss": 2.94, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.278637770897833, | |
| "grad_norm": 3.5740731241406007, | |
| "learning_rate": 9.846893012312549e-05, | |
| "loss": 2.9864, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.284829721362229, | |
| "grad_norm": 3.8848590600257116, | |
| "learning_rate": 9.844827813782431e-05, | |
| "loss": 3.0241, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.2910216718266254, | |
| "grad_norm": 3.3954938267589907, | |
| "learning_rate": 9.842748999588573e-05, | |
| "loss": 2.8355, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.2972136222910216, | |
| "grad_norm": 4.997061770959796, | |
| "learning_rate": 9.840656575573172e-05, | |
| "loss": 2.938, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.303405572755418, | |
| "grad_norm": 3.0491464862772504, | |
| "learning_rate": 9.83855054761667e-05, | |
| "loss": 2.9129, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.3095975232198143, | |
| "grad_norm": 3.738292669236681, | |
| "learning_rate": 9.836430921637745e-05, | |
| "loss": 3.1255, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.3157894736842106, | |
| "grad_norm": 3.1688052745498445, | |
| "learning_rate": 9.834297703593289e-05, | |
| "loss": 2.9097, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.3219814241486068, | |
| "grad_norm": 3.285545746034363, | |
| "learning_rate": 9.832150899478391e-05, | |
| "loss": 2.9212, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.328173374613003, | |
| "grad_norm": 3.885090577145353, | |
| "learning_rate": 9.829990515326324e-05, | |
| "loss": 3.0205, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.3343653250773992, | |
| "grad_norm": 3.9975283980429697, | |
| "learning_rate": 9.827816557208524e-05, | |
| "loss": 2.8925, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.3405572755417956, | |
| "grad_norm": 3.0485953751430026, | |
| "learning_rate": 9.825629031234574e-05, | |
| "loss": 2.9117, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.346749226006192, | |
| "grad_norm": 3.315430153804413, | |
| "learning_rate": 9.823427943552189e-05, | |
| "loss": 2.8829, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.3529411764705883, | |
| "grad_norm": 3.5651809329580604, | |
| "learning_rate": 9.821213300347198e-05, | |
| "loss": 3.0057, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.3591331269349844, | |
| "grad_norm": 3.827599928349406, | |
| "learning_rate": 9.818985107843523e-05, | |
| "loss": 2.966, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.3653250773993808, | |
| "grad_norm": 3.7317415108791643, | |
| "learning_rate": 9.816743372303165e-05, | |
| "loss": 2.84, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.3715170278637772, | |
| "grad_norm": 3.205316348701027, | |
| "learning_rate": 9.81448810002619e-05, | |
| "loss": 3.0217, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.3777089783281733, | |
| "grad_norm": 3.3987871140372836, | |
| "learning_rate": 9.812219297350697e-05, | |
| "loss": 2.935, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.3839009287925697, | |
| "grad_norm": 3.3696442144844214, | |
| "learning_rate": 9.809936970652823e-05, | |
| "loss": 2.904, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.390092879256966, | |
| "grad_norm": 3.764260097136488, | |
| "learning_rate": 9.807641126346702e-05, | |
| "loss": 2.874, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.3962848297213624, | |
| "grad_norm": 3.367367046433176, | |
| "learning_rate": 9.805331770884463e-05, | |
| "loss": 2.8504, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.4024767801857585, | |
| "grad_norm": 3.107765697644355, | |
| "learning_rate": 9.803008910756202e-05, | |
| "loss": 2.9753, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.4086687306501549, | |
| "grad_norm": 3.2508192746442464, | |
| "learning_rate": 9.800672552489972e-05, | |
| "loss": 2.8143, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.414860681114551, | |
| "grad_norm": 3.792239883033678, | |
| "learning_rate": 9.798322702651754e-05, | |
| "loss": 2.978, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.4210526315789473, | |
| "grad_norm": 2.959609552632562, | |
| "learning_rate": 9.795959367845456e-05, | |
| "loss": 2.8268, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.4272445820433437, | |
| "grad_norm": 3.284598910008138, | |
| "learning_rate": 9.793582554712872e-05, | |
| "loss": 2.8506, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.43343653250774, | |
| "grad_norm": 3.3502742038031217, | |
| "learning_rate": 9.79119226993368e-05, | |
| "loss": 2.9007, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.4396284829721362, | |
| "grad_norm": 3.361315897122181, | |
| "learning_rate": 9.788788520225421e-05, | |
| "loss": 2.8859, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.4458204334365325, | |
| "grad_norm": 3.314532975859471, | |
| "learning_rate": 9.786371312343471e-05, | |
| "loss": 2.8042, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.4520123839009287, | |
| "grad_norm": 3.191152838747858, | |
| "learning_rate": 9.783940653081031e-05, | |
| "loss": 2.8412, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.458204334365325, | |
| "grad_norm": 2.991136323469225, | |
| "learning_rate": 9.781496549269108e-05, | |
| "loss": 2.8797, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.4643962848297214, | |
| "grad_norm": 3.038355309822365, | |
| "learning_rate": 9.779039007776487e-05, | |
| "loss": 2.8915, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.4705882352941178, | |
| "grad_norm": 2.909235326921926, | |
| "learning_rate": 9.776568035509723e-05, | |
| "loss": 2.9127, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.4767801857585139, | |
| "grad_norm": 2.91895773779436, | |
| "learning_rate": 9.774083639413111e-05, | |
| "loss": 2.799, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.4829721362229102, | |
| "grad_norm": 3.492761784355138, | |
| "learning_rate": 9.771585826468679e-05, | |
| "loss": 3.0023, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.4891640866873064, | |
| "grad_norm": 4.063292684306405, | |
| "learning_rate": 9.769074603696153e-05, | |
| "loss": 2.9405, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.4953560371517027, | |
| "grad_norm": 3.4157481247973918, | |
| "learning_rate": 9.76654997815295e-05, | |
| "loss": 2.8109, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.501547987616099, | |
| "grad_norm": 3.2243028297166583, | |
| "learning_rate": 9.764011956934151e-05, | |
| "loss": 2.9908, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.5077399380804954, | |
| "grad_norm": 3.84874713440287, | |
| "learning_rate": 9.761460547172487e-05, | |
| "loss": 2.7957, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.5139318885448918, | |
| "grad_norm": 3.2207069415479057, | |
| "learning_rate": 9.758895756038313e-05, | |
| "loss": 2.9172, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.520123839009288, | |
| "grad_norm": 3.6267787360150447, | |
| "learning_rate": 9.756317590739591e-05, | |
| "loss": 2.9436, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.526315789473684, | |
| "grad_norm": 3.257441826018387, | |
| "learning_rate": 9.753726058521867e-05, | |
| "loss": 2.9519, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.5325077399380804, | |
| "grad_norm": 3.333004086171816, | |
| "learning_rate": 9.751121166668256e-05, | |
| "loss": 2.9425, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.5386996904024768, | |
| "grad_norm": 3.4442648583827706, | |
| "learning_rate": 9.748502922499418e-05, | |
| "loss": 2.9805, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.5448916408668731, | |
| "grad_norm": 2.885763067404801, | |
| "learning_rate": 9.745871333373533e-05, | |
| "loss": 2.8287, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.5510835913312695, | |
| "grad_norm": 2.828880724852376, | |
| "learning_rate": 9.743226406686292e-05, | |
| "loss": 2.7098, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.5572755417956656, | |
| "grad_norm": 2.932748551447019, | |
| "learning_rate": 9.740568149870864e-05, | |
| "loss": 2.905, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.5634674922600618, | |
| "grad_norm": 3.663837129950405, | |
| "learning_rate": 9.737896570397884e-05, | |
| "loss": 2.8037, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.5696594427244581, | |
| "grad_norm": 3.2587738369006787, | |
| "learning_rate": 9.735211675775423e-05, | |
| "loss": 2.7836, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.5758513931888545, | |
| "grad_norm": 3.6383097347732267, | |
| "learning_rate": 9.732513473548978e-05, | |
| "loss": 2.823, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.5820433436532508, | |
| "grad_norm": 3.1219260643503786, | |
| "learning_rate": 9.729801971301443e-05, | |
| "loss": 2.8655, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.5882352941176472, | |
| "grad_norm": 3.194832439749176, | |
| "learning_rate": 9.727077176653089e-05, | |
| "loss": 2.8342, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.5944272445820433, | |
| "grad_norm": 3.278437494188074, | |
| "learning_rate": 9.724339097261544e-05, | |
| "loss": 2.8681, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.6006191950464397, | |
| "grad_norm": 3.631741144245325, | |
| "learning_rate": 9.721587740821767e-05, | |
| "loss": 2.8873, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.6068111455108358, | |
| "grad_norm": 3.6607227863330754, | |
| "learning_rate": 9.718823115066039e-05, | |
| "loss": 2.8828, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.6130030959752322, | |
| "grad_norm": 3.1235965092567777, | |
| "learning_rate": 9.716045227763923e-05, | |
| "loss": 2.7903, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.6191950464396285, | |
| "grad_norm": 3.5015393310113843, | |
| "learning_rate": 9.713254086722258e-05, | |
| "loss": 2.9677, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.6253869969040249, | |
| "grad_norm": 3.45142761429685, | |
| "learning_rate": 9.710449699785129e-05, | |
| "loss": 2.8914, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.631578947368421, | |
| "grad_norm": 3.4244592017420143, | |
| "learning_rate": 9.707632074833843e-05, | |
| "loss": 2.9314, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.6377708978328174, | |
| "grad_norm": 3.413531385719404, | |
| "learning_rate": 9.704801219786915e-05, | |
| "loss": 2.934, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.6439628482972135, | |
| "grad_norm": 3.09548146061664, | |
| "learning_rate": 9.70195714260004e-05, | |
| "loss": 2.8336, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.6501547987616099, | |
| "grad_norm": 3.0823694542298994, | |
| "learning_rate": 9.69909985126607e-05, | |
| "loss": 2.8104, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.6563467492260062, | |
| "grad_norm": 4.038090716591855, | |
| "learning_rate": 9.696229353814996e-05, | |
| "loss": 2.9018, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.6625386996904026, | |
| "grad_norm": 4.07074396660653, | |
| "learning_rate": 9.693345658313923e-05, | |
| "loss": 2.9215, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.6687306501547987, | |
| "grad_norm": 2.945420498745823, | |
| "learning_rate": 9.690448772867042e-05, | |
| "loss": 2.9206, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.674922600619195, | |
| "grad_norm": 2.9798751049881256, | |
| "learning_rate": 9.687538705615619e-05, | |
| "loss": 2.9972, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.6811145510835912, | |
| "grad_norm": 3.224093315814822, | |
| "learning_rate": 9.684615464737963e-05, | |
| "loss": 2.8949, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.6873065015479876, | |
| "grad_norm": 3.2093046728721455, | |
| "learning_rate": 9.681679058449402e-05, | |
| "loss": 2.8111, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.693498452012384, | |
| "grad_norm": 3.298118927005518, | |
| "learning_rate": 9.67872949500227e-05, | |
| "loss": 2.8876, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.6996904024767803, | |
| "grad_norm": 3.4137199531552667, | |
| "learning_rate": 9.675766782685874e-05, | |
| "loss": 2.7961, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.7058823529411766, | |
| "grad_norm": 2.872759585021391, | |
| "learning_rate": 9.672790929826469e-05, | |
| "loss": 2.8342, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.7120743034055728, | |
| "grad_norm": 2.8673153506122646, | |
| "learning_rate": 9.669801944787249e-05, | |
| "loss": 2.7848, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.718266253869969, | |
| "grad_norm": 3.0243844826232795, | |
| "learning_rate": 9.666799835968308e-05, | |
| "loss": 2.8087, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.7244582043343653, | |
| "grad_norm": 3.278008218842454, | |
| "learning_rate": 9.663784611806624e-05, | |
| "loss": 2.8108, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.7306501547987616, | |
| "grad_norm": 3.3406097276972475, | |
| "learning_rate": 9.660756280776031e-05, | |
| "loss": 2.8563, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.736842105263158, | |
| "grad_norm": 3.139644391198559, | |
| "learning_rate": 9.657714851387202e-05, | |
| "loss": 2.8115, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.7430340557275543, | |
| "grad_norm": 3.040332202758651, | |
| "learning_rate": 9.65466033218762e-05, | |
| "loss": 2.9067, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.7492260061919505, | |
| "grad_norm": 3.1365447433215956, | |
| "learning_rate": 9.651592731761554e-05, | |
| "loss": 2.8302, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.7554179566563466, | |
| "grad_norm": 2.83437264967015, | |
| "learning_rate": 9.648512058730035e-05, | |
| "loss": 2.8447, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.761609907120743, | |
| "grad_norm": 3.3452921626485446, | |
| "learning_rate": 9.645418321750834e-05, | |
| "loss": 2.7867, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.7678018575851393, | |
| "grad_norm": 3.0390803645621025, | |
| "learning_rate": 9.642311529518438e-05, | |
| "loss": 2.8429, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.7739938080495357, | |
| "grad_norm": 2.97311416389538, | |
| "learning_rate": 9.639191690764018e-05, | |
| "loss": 2.9274, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.780185758513932, | |
| "grad_norm": 3.3736768005646787, | |
| "learning_rate": 9.636058814255418e-05, | |
| "loss": 2.9686, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.7863777089783281, | |
| "grad_norm": 4.0454229812273965, | |
| "learning_rate": 9.632912908797115e-05, | |
| "loss": 2.6697, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.7925696594427245, | |
| "grad_norm": 2.932905575784552, | |
| "learning_rate": 9.629753983230207e-05, | |
| "loss": 2.8265, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.7987616099071206, | |
| "grad_norm": 3.4815911353329274, | |
| "learning_rate": 9.626582046432384e-05, | |
| "loss": 2.8889, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.804953560371517, | |
| "grad_norm": 3.304517485229584, | |
| "learning_rate": 9.623397107317898e-05, | |
| "loss": 2.8299, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.8111455108359134, | |
| "grad_norm": 3.4688423925072573, | |
| "learning_rate": 9.620199174837541e-05, | |
| "loss": 2.8008, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.8173374613003097, | |
| "grad_norm": 3.46294088440551, | |
| "learning_rate": 9.616988257978628e-05, | |
| "loss": 2.8924, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.8235294117647058, | |
| "grad_norm": 3.5088075010477504, | |
| "learning_rate": 9.61376436576496e-05, | |
| "loss": 2.8466, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.8297213622291022, | |
| "grad_norm": 3.5637222019380825, | |
| "learning_rate": 9.610527507256802e-05, | |
| "loss": 2.8358, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.8359133126934983, | |
| "grad_norm": 3.1688045200480155, | |
| "learning_rate": 9.607277691550862e-05, | |
| "loss": 2.8324, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.8421052631578947, | |
| "grad_norm": 3.0702348395736236, | |
| "learning_rate": 9.60401492778026e-05, | |
| "loss": 2.9719, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.848297213622291, | |
| "grad_norm": 3.588593649177979, | |
| "learning_rate": 9.600739225114506e-05, | |
| "loss": 2.826, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.8544891640866874, | |
| "grad_norm": 3.3305202346452294, | |
| "learning_rate": 9.59745059275947e-05, | |
| "loss": 3.0333, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.8606811145510835, | |
| "grad_norm": 3.1335924494846665, | |
| "learning_rate": 9.594149039957365e-05, | |
| "loss": 2.8217, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.86687306501548, | |
| "grad_norm": 2.596605310119535, | |
| "learning_rate": 9.590834575986708e-05, | |
| "loss": 2.8302, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.873065015479876, | |
| "grad_norm": 2.842224636487708, | |
| "learning_rate": 9.587507210162307e-05, | |
| "loss": 2.77, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.8792569659442724, | |
| "grad_norm": 3.375074913274891, | |
| "learning_rate": 9.584166951835222e-05, | |
| "loss": 2.914, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.8854489164086687, | |
| "grad_norm": 3.086672402067182, | |
| "learning_rate": 9.580813810392755e-05, | |
| "loss": 2.8741, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.891640866873065, | |
| "grad_norm": 3.412761519550504, | |
| "learning_rate": 9.577447795258403e-05, | |
| "loss": 2.713, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.8978328173374615, | |
| "grad_norm": 2.825161059955203, | |
| "learning_rate": 9.574068915891849e-05, | |
| "loss": 2.7335, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.9040247678018576, | |
| "grad_norm": 2.9720239351377775, | |
| "learning_rate": 9.57067718178893e-05, | |
| "loss": 2.7431, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.9102167182662537, | |
| "grad_norm": 3.2093709733529856, | |
| "learning_rate": 9.567272602481606e-05, | |
| "loss": 2.9308, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.91640866873065, | |
| "grad_norm": 3.048048026454268, | |
| "learning_rate": 9.563855187537937e-05, | |
| "loss": 2.8424, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.9226006191950464, | |
| "grad_norm": 3.0542567194175567, | |
| "learning_rate": 9.56042494656206e-05, | |
| "loss": 2.8537, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.9287925696594428, | |
| "grad_norm": 3.349906695410103, | |
| "learning_rate": 9.556981889194149e-05, | |
| "loss": 2.7549, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.9349845201238391, | |
| "grad_norm": 3.0260829521434807, | |
| "learning_rate": 9.553526025110405e-05, | |
| "loss": 2.962, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.9411764705882353, | |
| "grad_norm": 3.1632389516665778, | |
| "learning_rate": 9.550057364023016e-05, | |
| "loss": 2.8224, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.9473684210526314, | |
| "grad_norm": 3.1179495852636516, | |
| "learning_rate": 9.546575915680134e-05, | |
| "loss": 2.8394, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.9535603715170278, | |
| "grad_norm": 3.242102168782828, | |
| "learning_rate": 9.54308168986585e-05, | |
| "loss": 2.8167, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.9597523219814241, | |
| "grad_norm": 3.53462667333172, | |
| "learning_rate": 9.539574696400164e-05, | |
| "loss": 2.7766, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.9659442724458205, | |
| "grad_norm": 3.987768918752747, | |
| "learning_rate": 9.536054945138953e-05, | |
| "loss": 2.6656, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.9721362229102168, | |
| "grad_norm": 2.8942848523535947, | |
| "learning_rate": 9.532522445973955e-05, | |
| "loss": 2.798, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.978328173374613, | |
| "grad_norm": 2.938369919319297, | |
| "learning_rate": 9.528977208832729e-05, | |
| "loss": 2.8506, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.9845201238390093, | |
| "grad_norm": 3.286930320946629, | |
| "learning_rate": 9.525419243678632e-05, | |
| "loss": 2.9692, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.9907120743034055, | |
| "grad_norm": 2.900305427286331, | |
| "learning_rate": 9.521848560510796e-05, | |
| "loss": 2.8208, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.9969040247678018, | |
| "grad_norm": 3.183121159883855, | |
| "learning_rate": 9.518265169364089e-05, | |
| "loss": 2.761, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.768600331874623, | |
| "learning_rate": 9.514669080309096e-05, | |
| "loss": 1.4118, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 2.0061919504643964, | |
| "grad_norm": 2.7548583500669976, | |
| "learning_rate": 9.51106030345209e-05, | |
| "loss": 2.6787, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.0123839009287927, | |
| "grad_norm": 3.4443221488903926, | |
| "learning_rate": 9.507438848934995e-05, | |
| "loss": 2.6948, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 2.018575851393189, | |
| "grad_norm": 2.8726596638954938, | |
| "learning_rate": 9.503804726935368e-05, | |
| "loss": 2.6093, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 2.024767801857585, | |
| "grad_norm": 3.389044741091986, | |
| "learning_rate": 9.500157947666367e-05, | |
| "loss": 2.5705, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 2.0309597523219813, | |
| "grad_norm": 3.0580993903626306, | |
| "learning_rate": 9.496498521376717e-05, | |
| "loss": 2.5358, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 2.0371517027863777, | |
| "grad_norm": 3.468931786923507, | |
| "learning_rate": 9.492826458350691e-05, | |
| "loss": 2.5419, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.043343653250774, | |
| "grad_norm": 3.3444948196299427, | |
| "learning_rate": 9.489141768908071e-05, | |
| "loss": 2.7272, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 2.0495356037151704, | |
| "grad_norm": 3.1880613030560454, | |
| "learning_rate": 9.485444463404125e-05, | |
| "loss": 2.6179, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 2.0557275541795668, | |
| "grad_norm": 3.987073827921253, | |
| "learning_rate": 9.481734552229578e-05, | |
| "loss": 2.6154, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 2.0619195046439627, | |
| "grad_norm": 3.405296329527151, | |
| "learning_rate": 9.47801204581058e-05, | |
| "loss": 2.6811, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 2.068111455108359, | |
| "grad_norm": 3.1752299278728175, | |
| "learning_rate": 9.474276954608677e-05, | |
| "loss": 2.638, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 2.0743034055727554, | |
| "grad_norm": 3.4269857244773543, | |
| "learning_rate": 9.470529289120786e-05, | |
| "loss": 2.6532, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 2.0804953560371517, | |
| "grad_norm": 3.2676309514016495, | |
| "learning_rate": 9.466769059879159e-05, | |
| "loss": 2.6029, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 2.086687306501548, | |
| "grad_norm": 3.613838896557624, | |
| "learning_rate": 9.462996277451359e-05, | |
| "loss": 2.5344, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 2.0928792569659445, | |
| "grad_norm": 3.1931623478278928, | |
| "learning_rate": 9.459210952440225e-05, | |
| "loss": 2.5997, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 2.0990712074303404, | |
| "grad_norm": 3.5892129567202913, | |
| "learning_rate": 9.455413095483849e-05, | |
| "loss": 2.6291, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.1052631578947367, | |
| "grad_norm": 3.1425227101080955, | |
| "learning_rate": 9.451602717255537e-05, | |
| "loss": 2.5365, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 2.111455108359133, | |
| "grad_norm": 3.7512542147695407, | |
| "learning_rate": 9.447779828463788e-05, | |
| "loss": 2.7276, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 2.1176470588235294, | |
| "grad_norm": 3.8083642585550237, | |
| "learning_rate": 9.443944439852259e-05, | |
| "loss": 2.619, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 2.123839009287926, | |
| "grad_norm": 3.870821371956724, | |
| "learning_rate": 9.440096562199736e-05, | |
| "loss": 2.5995, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 2.130030959752322, | |
| "grad_norm": 3.004401568037884, | |
| "learning_rate": 9.436236206320104e-05, | |
| "loss": 2.5807, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.136222910216718, | |
| "grad_norm": 3.5811035323778624, | |
| "learning_rate": 9.432363383062314e-05, | |
| "loss": 2.5922, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 2.1424148606811144, | |
| "grad_norm": 3.1678609835071634, | |
| "learning_rate": 9.428478103310357e-05, | |
| "loss": 2.5559, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 2.1486068111455108, | |
| "grad_norm": 4.00863984446441, | |
| "learning_rate": 9.424580377983231e-05, | |
| "loss": 2.814, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 2.154798761609907, | |
| "grad_norm": 3.3069124426114636, | |
| "learning_rate": 9.420670218034913e-05, | |
| "loss": 2.5825, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 2.1609907120743035, | |
| "grad_norm": 3.227932310894545, | |
| "learning_rate": 9.416747634454316e-05, | |
| "loss": 2.7328, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.1671826625387, | |
| "grad_norm": 3.193488562980502, | |
| "learning_rate": 9.412812638265279e-05, | |
| "loss": 2.6004, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 2.173374613003096, | |
| "grad_norm": 3.138286190856697, | |
| "learning_rate": 9.408865240526518e-05, | |
| "loss": 2.6494, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 2.179566563467492, | |
| "grad_norm": 3.486032822685665, | |
| "learning_rate": 9.404905452331604e-05, | |
| "loss": 2.5935, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 2.1857585139318885, | |
| "grad_norm": 3.57049625554477, | |
| "learning_rate": 9.400933284808933e-05, | |
| "loss": 2.5879, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 2.191950464396285, | |
| "grad_norm": 3.0638448164434826, | |
| "learning_rate": 9.396948749121681e-05, | |
| "loss": 2.695, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.198142414860681, | |
| "grad_norm": 3.9956558838253056, | |
| "learning_rate": 9.392951856467795e-05, | |
| "loss": 2.6175, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 2.2043343653250775, | |
| "grad_norm": 3.8578672053862775, | |
| "learning_rate": 9.388942618079941e-05, | |
| "loss": 2.6658, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 2.2105263157894735, | |
| "grad_norm": 3.25975304104384, | |
| "learning_rate": 9.384921045225483e-05, | |
| "loss": 2.5892, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 2.21671826625387, | |
| "grad_norm": 2.5766286937173626, | |
| "learning_rate": 9.380887149206452e-05, | |
| "loss": 2.6644, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 2.222910216718266, | |
| "grad_norm": 3.047972701055752, | |
| "learning_rate": 9.376840941359508e-05, | |
| "loss": 2.5642, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.2291021671826625, | |
| "grad_norm": 2.9547625831600492, | |
| "learning_rate": 9.372782433055914e-05, | |
| "loss": 2.5834, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 2.235294117647059, | |
| "grad_norm": 3.2164908658189844, | |
| "learning_rate": 9.368711635701499e-05, | |
| "loss": 2.6811, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 2.2414860681114552, | |
| "grad_norm": 3.283475564049949, | |
| "learning_rate": 9.364628560736631e-05, | |
| "loss": 2.6111, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 2.2476780185758516, | |
| "grad_norm": 3.343599128743468, | |
| "learning_rate": 9.36053321963618e-05, | |
| "loss": 2.6123, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 2.2538699690402475, | |
| "grad_norm": 3.4502623336737424, | |
| "learning_rate": 9.356425623909493e-05, | |
| "loss": 2.7775, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.260061919504644, | |
| "grad_norm": 3.162272731188207, | |
| "learning_rate": 9.352305785100348e-05, | |
| "loss": 2.7052, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 2.26625386996904, | |
| "grad_norm": 3.3567857923317272, | |
| "learning_rate": 9.348173714786939e-05, | |
| "loss": 2.6605, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 2.2724458204334366, | |
| "grad_norm": 3.3134267019833974, | |
| "learning_rate": 9.344029424581829e-05, | |
| "loss": 2.6522, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 2.278637770897833, | |
| "grad_norm": 3.0966854245280353, | |
| "learning_rate": 9.339872926131929e-05, | |
| "loss": 2.5706, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 2.2848297213622293, | |
| "grad_norm": 2.9494878884102533, | |
| "learning_rate": 9.335704231118454e-05, | |
| "loss": 2.7847, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.291021671826625, | |
| "grad_norm": 3.0052075309306483, | |
| "learning_rate": 9.331523351256896e-05, | |
| "loss": 2.4242, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 2.2972136222910216, | |
| "grad_norm": 3.865779841352929, | |
| "learning_rate": 9.327330298296997e-05, | |
| "loss": 2.6202, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 2.303405572755418, | |
| "grad_norm": 3.413710786689101, | |
| "learning_rate": 9.323125084022701e-05, | |
| "loss": 2.6319, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 2.3095975232198143, | |
| "grad_norm": 3.9872687160051927, | |
| "learning_rate": 9.318907720252136e-05, | |
| "loss": 2.7452, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 2.3157894736842106, | |
| "grad_norm": 3.606954976832062, | |
| "learning_rate": 9.31467821883757e-05, | |
| "loss": 2.7013, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.321981424148607, | |
| "grad_norm": 3.0244580104851364, | |
| "learning_rate": 9.310436591665384e-05, | |
| "loss": 2.7322, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 2.3281733746130033, | |
| "grad_norm": 3.2847212713666702, | |
| "learning_rate": 9.306182850656038e-05, | |
| "loss": 2.6815, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 2.3343653250773992, | |
| "grad_norm": 3.253192934470869, | |
| "learning_rate": 9.301917007764034e-05, | |
| "loss": 2.7149, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 2.3405572755417956, | |
| "grad_norm": 3.384274414142735, | |
| "learning_rate": 9.297639074977885e-05, | |
| "loss": 2.6428, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 2.346749226006192, | |
| "grad_norm": 3.5615004713406666, | |
| "learning_rate": 9.293349064320083e-05, | |
| "loss": 2.6023, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "grad_norm": 3.153626899286148, | |
| "learning_rate": 9.289046987847058e-05, | |
| "loss": 2.5489, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 2.3591331269349847, | |
| "grad_norm": 4.008345134198719, | |
| "learning_rate": 9.284732857649154e-05, | |
| "loss": 2.6347, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.3653250773993806, | |
| "grad_norm": 3.605420355608457, | |
| "learning_rate": 9.280406685850586e-05, | |
| "loss": 2.6115, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 2.371517027863777, | |
| "grad_norm": 3.2090318428995896, | |
| "learning_rate": 9.276068484609418e-05, | |
| "loss": 2.5584, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.3777089783281733, | |
| "grad_norm": 3.7889425362871556, | |
| "learning_rate": 9.271718266117511e-05, | |
| "loss": 2.7685, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.3839009287925697, | |
| "grad_norm": 3.4617864132858056, | |
| "learning_rate": 9.267356042600505e-05, | |
| "loss": 2.6928, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.390092879256966, | |
| "grad_norm": 3.4918988126495414, | |
| "learning_rate": 9.262981826317777e-05, | |
| "loss": 2.5776, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 2.3962848297213624, | |
| "grad_norm": 3.2844017613022225, | |
| "learning_rate": 9.258595629562405e-05, | |
| "loss": 2.5817, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.4024767801857587, | |
| "grad_norm": 3.8704827773214006, | |
| "learning_rate": 9.254197464661143e-05, | |
| "loss": 2.6336, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 2.4086687306501546, | |
| "grad_norm": 3.605148120779814, | |
| "learning_rate": 9.249787343974371e-05, | |
| "loss": 2.5303, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.414860681114551, | |
| "grad_norm": 3.9953677588197496, | |
| "learning_rate": 9.245365279896075e-05, | |
| "loss": 2.5591, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 2.4210526315789473, | |
| "grad_norm": 3.2326130501278465, | |
| "learning_rate": 9.240931284853807e-05, | |
| "loss": 2.6319, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.4272445820433437, | |
| "grad_norm": 3.4297234946250135, | |
| "learning_rate": 9.236485371308642e-05, | |
| "loss": 2.6363, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 2.43343653250774, | |
| "grad_norm": 3.9434355381933837, | |
| "learning_rate": 9.232027551755157e-05, | |
| "loss": 2.5911, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.4396284829721364, | |
| "grad_norm": 3.416834504577997, | |
| "learning_rate": 9.22755783872139e-05, | |
| "loss": 2.7753, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.4458204334365323, | |
| "grad_norm": 2.798767439755245, | |
| "learning_rate": 9.223076244768797e-05, | |
| "loss": 2.5571, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.4520123839009287, | |
| "grad_norm": 3.312096781560587, | |
| "learning_rate": 9.218582782492227e-05, | |
| "loss": 2.7031, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.458204334365325, | |
| "grad_norm": 3.3106706550562772, | |
| "learning_rate": 9.214077464519885e-05, | |
| "loss": 2.5017, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.4643962848297214, | |
| "grad_norm": 2.882854108845148, | |
| "learning_rate": 9.209560303513296e-05, | |
| "loss": 2.779, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.4705882352941178, | |
| "grad_norm": 3.603742563496827, | |
| "learning_rate": 9.20503131216726e-05, | |
| "loss": 2.5953, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.476780185758514, | |
| "grad_norm": 3.056177277208107, | |
| "learning_rate": 9.20049050320983e-05, | |
| "loss": 2.6001, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 2.4829721362229105, | |
| "grad_norm": 3.494580488159622, | |
| "learning_rate": 9.195937889402276e-05, | |
| "loss": 2.6322, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.4891640866873064, | |
| "grad_norm": 3.360260834795966, | |
| "learning_rate": 9.19137348353903e-05, | |
| "loss": 2.63, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.4953560371517027, | |
| "grad_norm": 3.320590906602975, | |
| "learning_rate": 9.186797298447678e-05, | |
| "loss": 2.5977, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.501547987616099, | |
| "grad_norm": 3.4580929903970414, | |
| "learning_rate": 9.182209346988902e-05, | |
| "loss": 2.7052, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.5077399380804954, | |
| "grad_norm": 3.113537260619799, | |
| "learning_rate": 9.177609642056451e-05, | |
| "loss": 2.6589, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.513931888544892, | |
| "grad_norm": 2.7289651129651147, | |
| "learning_rate": 9.17299819657711e-05, | |
| "loss": 2.5995, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.5201238390092877, | |
| "grad_norm": 2.9890848188099066, | |
| "learning_rate": 9.168375023510654e-05, | |
| "loss": 2.7075, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.526315789473684, | |
| "grad_norm": 3.1251539583034615, | |
| "learning_rate": 9.163740135849823e-05, | |
| "loss": 2.6149, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.5325077399380804, | |
| "grad_norm": 3.4651982610153986, | |
| "learning_rate": 9.159093546620272e-05, | |
| "loss": 2.597, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.538699690402477, | |
| "grad_norm": 3.443446796385959, | |
| "learning_rate": 9.154435268880546e-05, | |
| "loss": 2.7328, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.544891640866873, | |
| "grad_norm": 3.5113324699487842, | |
| "learning_rate": 9.149765315722039e-05, | |
| "loss": 2.6076, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.5510835913312695, | |
| "grad_norm": 3.6507929025803088, | |
| "learning_rate": 9.145083700268954e-05, | |
| "loss": 2.5952, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.557275541795666, | |
| "grad_norm": 3.1280086170625503, | |
| "learning_rate": 9.140390435678272e-05, | |
| "loss": 2.6725, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.5634674922600618, | |
| "grad_norm": 3.4152513222205316, | |
| "learning_rate": 9.135685535139709e-05, | |
| "loss": 2.6314, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.569659442724458, | |
| "grad_norm": 3.278814596158421, | |
| "learning_rate": 9.130969011875685e-05, | |
| "loss": 2.7043, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.5758513931888545, | |
| "grad_norm": 3.4642520708537496, | |
| "learning_rate": 9.126240879141285e-05, | |
| "loss": 2.6676, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.582043343653251, | |
| "grad_norm": 3.2092964996317783, | |
| "learning_rate": 9.121501150224217e-05, | |
| "loss": 2.6003, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.588235294117647, | |
| "grad_norm": 3.983130708249009, | |
| "learning_rate": 9.116749838444778e-05, | |
| "loss": 2.8115, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.594427244582043, | |
| "grad_norm": 3.2303996809709057, | |
| "learning_rate": 9.111986957155823e-05, | |
| "loss": 2.7408, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.6006191950464395, | |
| "grad_norm": 3.021042071829146, | |
| "learning_rate": 9.107212519742714e-05, | |
| "loss": 2.6704, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.606811145510836, | |
| "grad_norm": 3.7177435258237534, | |
| "learning_rate": 9.102426539623295e-05, | |
| "loss": 2.6222, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.613003095975232, | |
| "grad_norm": 3.3065094436349667, | |
| "learning_rate": 9.097629030247846e-05, | |
| "loss": 2.6983, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.6191950464396285, | |
| "grad_norm": 3.3461082798054864, | |
| "learning_rate": 9.09282000509905e-05, | |
| "loss": 2.6419, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.625386996904025, | |
| "grad_norm": 3.3413036061467976, | |
| "learning_rate": 9.087999477691952e-05, | |
| "loss": 2.6388, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.6315789473684212, | |
| "grad_norm": 3.866138226599775, | |
| "learning_rate": 9.083167461573925e-05, | |
| "loss": 2.6888, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.6377708978328176, | |
| "grad_norm": 3.751760167233359, | |
| "learning_rate": 9.078323970324625e-05, | |
| "loss": 2.722, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.6439628482972135, | |
| "grad_norm": 3.2089862942574894, | |
| "learning_rate": 9.07346901755596e-05, | |
| "loss": 2.6437, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.65015479876161, | |
| "grad_norm": 3.0031534771552697, | |
| "learning_rate": 9.068602616912049e-05, | |
| "loss": 2.6457, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.656346749226006, | |
| "grad_norm": 2.998028740990232, | |
| "learning_rate": 9.063724782069183e-05, | |
| "loss": 2.5922, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.6625386996904026, | |
| "grad_norm": 3.2856296543776735, | |
| "learning_rate": 9.058835526735787e-05, | |
| "loss": 2.5476, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.6687306501547985, | |
| "grad_norm": 3.278728909273792, | |
| "learning_rate": 9.053934864652382e-05, | |
| "loss": 2.7063, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.674922600619195, | |
| "grad_norm": 3.203008246999124, | |
| "learning_rate": 9.049022809591546e-05, | |
| "loss": 2.7475, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.681114551083591, | |
| "grad_norm": 3.354224738161965, | |
| "learning_rate": 9.04409937535787e-05, | |
| "loss": 2.6243, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.6873065015479876, | |
| "grad_norm": 3.5804759078596056, | |
| "learning_rate": 9.039164575787938e-05, | |
| "loss": 2.5816, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.693498452012384, | |
| "grad_norm": 2.9452472701608543, | |
| "learning_rate": 9.034218424750259e-05, | |
| "loss": 2.6851, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.6996904024767803, | |
| "grad_norm": 3.196962334371782, | |
| "learning_rate": 9.029260936145251e-05, | |
| "loss": 2.5857, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.7058823529411766, | |
| "grad_norm": 2.905856705546016, | |
| "learning_rate": 9.0242921239052e-05, | |
| "loss": 2.6213, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.712074303405573, | |
| "grad_norm": 2.9055102854318506, | |
| "learning_rate": 9.019312001994202e-05, | |
| "loss": 2.535, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.718266253869969, | |
| "grad_norm": 3.1173866399821506, | |
| "learning_rate": 9.014320584408148e-05, | |
| "loss": 2.5721, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.7244582043343653, | |
| "grad_norm": 3.7789655683417287, | |
| "learning_rate": 9.00931788517467e-05, | |
| "loss": 2.6374, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.7306501547987616, | |
| "grad_norm": 3.4646023201290106, | |
| "learning_rate": 9.004303918353107e-05, | |
| "loss": 2.6499, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.736842105263158, | |
| "grad_norm": 3.9484421562322747, | |
| "learning_rate": 8.999278698034461e-05, | |
| "loss": 2.6948, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.7430340557275543, | |
| "grad_norm": 3.442553230839332, | |
| "learning_rate": 8.994242238341362e-05, | |
| "loss": 2.6144, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.7492260061919502, | |
| "grad_norm": 5.0516235581172895, | |
| "learning_rate": 8.989194553428028e-05, | |
| "loss": 2.6532, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.7554179566563466, | |
| "grad_norm": 3.68844887791005, | |
| "learning_rate": 8.984135657480219e-05, | |
| "loss": 2.5519, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.761609907120743, | |
| "grad_norm": 3.135950737705265, | |
| "learning_rate": 8.97906556471521e-05, | |
| "loss": 2.6005, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.7678018575851393, | |
| "grad_norm": 2.973619254046368, | |
| "learning_rate": 8.973984289381733e-05, | |
| "loss": 2.4656, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.7739938080495357, | |
| "grad_norm": 4.290896676580274, | |
| "learning_rate": 8.968891845759955e-05, | |
| "loss": 2.6052, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.780185758513932, | |
| "grad_norm": 3.8773305146838783, | |
| "learning_rate": 8.963788248161428e-05, | |
| "loss": 2.6004, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.7863777089783284, | |
| "grad_norm": 3.5246724411520507, | |
| "learning_rate": 8.958673510929046e-05, | |
| "loss": 2.7062, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.7925696594427247, | |
| "grad_norm": 3.68544856626367, | |
| "learning_rate": 8.953547648437016e-05, | |
| "loss": 2.7555, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.7987616099071206, | |
| "grad_norm": 4.282996144603401, | |
| "learning_rate": 8.948410675090805e-05, | |
| "loss": 2.7417, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.804953560371517, | |
| "grad_norm": 4.9262667481372135, | |
| "learning_rate": 8.943262605327112e-05, | |
| "loss": 2.6828, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.8111455108359134, | |
| "grad_norm": 3.51958635902412, | |
| "learning_rate": 8.938103453613813e-05, | |
| "loss": 2.7015, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.8173374613003097, | |
| "grad_norm": 3.003904355716457, | |
| "learning_rate": 8.932933234449934e-05, | |
| "loss": 2.6282, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.8235294117647056, | |
| "grad_norm": 2.8951986217352133, | |
| "learning_rate": 8.927751962365603e-05, | |
| "loss": 2.5677, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.829721362229102, | |
| "grad_norm": 2.973677750838087, | |
| "learning_rate": 8.922559651922006e-05, | |
| "loss": 2.6943, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.8359133126934983, | |
| "grad_norm": 3.923968948876088, | |
| "learning_rate": 8.917356317711358e-05, | |
| "loss": 2.8386, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.8421052631578947, | |
| "grad_norm": 3.748206431260409, | |
| "learning_rate": 8.912141974356853e-05, | |
| "loss": 2.5004, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.848297213622291, | |
| "grad_norm": 2.9589835275416165, | |
| "learning_rate": 8.90691663651262e-05, | |
| "loss": 2.7211, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.8544891640866874, | |
| "grad_norm": 4.413461022857995, | |
| "learning_rate": 8.90168031886369e-05, | |
| "loss": 2.7161, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.8606811145510838, | |
| "grad_norm": 3.8458500455119355, | |
| "learning_rate": 8.896433036125949e-05, | |
| "loss": 2.6386, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.86687306501548, | |
| "grad_norm": 3.5064727605205235, | |
| "learning_rate": 8.891174803046102e-05, | |
| "loss": 2.6769, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.873065015479876, | |
| "grad_norm": 3.3253407518893554, | |
| "learning_rate": 8.885905634401628e-05, | |
| "loss": 2.3765, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.8792569659442724, | |
| "grad_norm": 3.2901194346879232, | |
| "learning_rate": 8.880625545000734e-05, | |
| "loss": 2.6121, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.8854489164086687, | |
| "grad_norm": 3.8787003089594965, | |
| "learning_rate": 8.875334549682322e-05, | |
| "loss": 2.7162, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.891640866873065, | |
| "grad_norm": 3.1158601459237087, | |
| "learning_rate": 8.870032663315943e-05, | |
| "loss": 2.4268, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.8978328173374615, | |
| "grad_norm": 3.523227254158828, | |
| "learning_rate": 8.864719900801755e-05, | |
| "loss": 2.6866, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.9040247678018574, | |
| "grad_norm": 3.7947906426929294, | |
| "learning_rate": 8.859396277070483e-05, | |
| "loss": 2.5134, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.9102167182662537, | |
| "grad_norm": 3.5665946360922756, | |
| "learning_rate": 8.854061807083376e-05, | |
| "loss": 2.6598, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.91640866873065, | |
| "grad_norm": 3.6433600779737723, | |
| "learning_rate": 8.848716505832162e-05, | |
| "loss": 2.615, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.9226006191950464, | |
| "grad_norm": 3.280696174381033, | |
| "learning_rate": 8.843360388339011e-05, | |
| "loss": 2.6286, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.928792569659443, | |
| "grad_norm": 3.5271347403489295, | |
| "learning_rate": 8.837993469656489e-05, | |
| "loss": 2.6689, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.934984520123839, | |
| "grad_norm": 3.535085290280349, | |
| "learning_rate": 8.83261576486752e-05, | |
| "loss": 2.7654, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.9411764705882355, | |
| "grad_norm": 3.1024372707379664, | |
| "learning_rate": 8.827227289085339e-05, | |
| "loss": 2.5418, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.9473684210526314, | |
| "grad_norm": 3.0048339939826154, | |
| "learning_rate": 8.821828057453447e-05, | |
| "loss": 2.6055, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.9535603715170278, | |
| "grad_norm": 3.6042625802261625, | |
| "learning_rate": 8.816418085145582e-05, | |
| "loss": 2.8061, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.959752321981424, | |
| "grad_norm": 4.4277235133739605, | |
| "learning_rate": 8.810997387365656e-05, | |
| "loss": 2.8611, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.9659442724458205, | |
| "grad_norm": 3.672119841517933, | |
| "learning_rate": 8.805565979347734e-05, | |
| "loss": 2.781, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.972136222910217, | |
| "grad_norm": 4.2331486999864, | |
| "learning_rate": 8.800123876355976e-05, | |
| "loss": 2.6954, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.9783281733746128, | |
| "grad_norm": 3.6567653823539694, | |
| "learning_rate": 8.794671093684595e-05, | |
| "loss": 2.6517, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.984520123839009, | |
| "grad_norm": 3.4224822473176966, | |
| "learning_rate": 8.789207646657822e-05, | |
| "loss": 2.6759, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.9907120743034055, | |
| "grad_norm": 3.299869011062663, | |
| "learning_rate": 8.783733550629857e-05, | |
| "loss": 2.7011, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.996904024767802, | |
| "grad_norm": 3.2365085802605464, | |
| "learning_rate": 8.778248820984829e-05, | |
| "loss": 2.6494, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 3.2365085802605464, | |
| "learning_rate": 8.77275347313675e-05, | |
| "loss": 1.2508, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 3.0061919504643964, | |
| "grad_norm": 3.142785796812641, | |
| "learning_rate": 8.767247522529474e-05, | |
| "loss": 2.5231, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 3.0123839009287927, | |
| "grad_norm": 2.9912371857567117, | |
| "learning_rate": 8.761730984636648e-05, | |
| "loss": 2.4012, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 3.018575851393189, | |
| "grad_norm": 3.242699233358939, | |
| "learning_rate": 8.75620387496168e-05, | |
| "loss": 2.4209, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 3.024767801857585, | |
| "grad_norm": 3.4343945955583606, | |
| "learning_rate": 8.750666209037684e-05, | |
| "loss": 2.3436, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.0309597523219813, | |
| "grad_norm": 4.028979174625844, | |
| "learning_rate": 8.745118002427439e-05, | |
| "loss": 2.3537, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 3.0371517027863777, | |
| "grad_norm": 3.419844103554572, | |
| "learning_rate": 8.739559270723353e-05, | |
| "loss": 2.3517, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 3.043343653250774, | |
| "grad_norm": 4.079075872820777, | |
| "learning_rate": 8.733990029547407e-05, | |
| "loss": 2.4506, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 3.0495356037151704, | |
| "grad_norm": 4.453433315359627, | |
| "learning_rate": 8.728410294551123e-05, | |
| "loss": 2.4242, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 3.0557275541795668, | |
| "grad_norm": 4.185230535033415, | |
| "learning_rate": 8.72282008141551e-05, | |
| "loss": 2.4632, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 3.0619195046439627, | |
| "grad_norm": 4.434685158118041, | |
| "learning_rate": 8.717219405851025e-05, | |
| "loss": 2.3609, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 3.068111455108359, | |
| "grad_norm": 4.265164936477565, | |
| "learning_rate": 8.71160828359753e-05, | |
| "loss": 2.4868, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 3.0743034055727554, | |
| "grad_norm": 4.115197176895919, | |
| "learning_rate": 8.705986730424243e-05, | |
| "loss": 2.4306, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 3.0804953560371517, | |
| "grad_norm": 3.6219070451775246, | |
| "learning_rate": 8.7003547621297e-05, | |
| "loss": 2.3345, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 3.086687306501548, | |
| "grad_norm": 3.260204917718609, | |
| "learning_rate": 8.694712394541705e-05, | |
| "loss": 2.4696, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.0928792569659445, | |
| "grad_norm": 3.661355382479066, | |
| "learning_rate": 8.689059643517286e-05, | |
| "loss": 2.482, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 3.0990712074303404, | |
| "grad_norm": 4.094138838211809, | |
| "learning_rate": 8.683396524942656e-05, | |
| "loss": 2.3903, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 3.1052631578947367, | |
| "grad_norm": 3.970245486136833, | |
| "learning_rate": 8.677723054733162e-05, | |
| "loss": 2.3571, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 3.111455108359133, | |
| "grad_norm": 4.532439692668108, | |
| "learning_rate": 8.672039248833244e-05, | |
| "loss": 2.3878, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 3.1176470588235294, | |
| "grad_norm": 4.033683253050144, | |
| "learning_rate": 8.666345123216386e-05, | |
| "loss": 2.4167, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 3.123839009287926, | |
| "grad_norm": 3.975201751116555, | |
| "learning_rate": 8.66064069388508e-05, | |
| "loss": 2.3517, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 3.130030959752322, | |
| "grad_norm": 3.3788260641137975, | |
| "learning_rate": 8.654925976870766e-05, | |
| "loss": 2.3212, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 3.136222910216718, | |
| "grad_norm": 3.9584592514363166, | |
| "learning_rate": 8.649200988233806e-05, | |
| "loss": 2.4028, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 3.1424148606811144, | |
| "grad_norm": 3.689388657105196, | |
| "learning_rate": 8.643465744063419e-05, | |
| "loss": 2.4571, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 3.1486068111455108, | |
| "grad_norm": 3.605974543644076, | |
| "learning_rate": 8.637720260477656e-05, | |
| "loss": 2.4103, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.154798761609907, | |
| "grad_norm": 3.8794947630259613, | |
| "learning_rate": 8.631964553623337e-05, | |
| "loss": 2.3767, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 3.1609907120743035, | |
| "grad_norm": 3.4775368857027193, | |
| "learning_rate": 8.626198639676014e-05, | |
| "loss": 2.3674, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 3.1671826625387, | |
| "grad_norm": 3.6633952894093467, | |
| "learning_rate": 8.620422534839925e-05, | |
| "loss": 2.4016, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 3.173374613003096, | |
| "grad_norm": 3.8137471386364568, | |
| "learning_rate": 8.614636255347952e-05, | |
| "loss": 2.3726, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 3.179566563467492, | |
| "grad_norm": 3.7359331492842753, | |
| "learning_rate": 8.608839817461564e-05, | |
| "loss": 2.362, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 3.1857585139318885, | |
| "grad_norm": 3.7777692904063915, | |
| "learning_rate": 8.603033237470784e-05, | |
| "loss": 2.403, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 3.191950464396285, | |
| "grad_norm": 4.18451321680891, | |
| "learning_rate": 8.597216531694137e-05, | |
| "loss": 2.3073, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 3.198142414860681, | |
| "grad_norm": 4.067292793649438, | |
| "learning_rate": 8.591389716478604e-05, | |
| "loss": 2.3289, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 3.2043343653250775, | |
| "grad_norm": 3.934930338281878, | |
| "learning_rate": 8.585552808199576e-05, | |
| "loss": 2.4575, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 3.2105263157894735, | |
| "grad_norm": 4.657392240081009, | |
| "learning_rate": 8.579705823260813e-05, | |
| "loss": 2.4673, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.21671826625387, | |
| "grad_norm": 3.684852355859821, | |
| "learning_rate": 8.573848778094389e-05, | |
| "loss": 2.4688, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 3.222910216718266, | |
| "grad_norm": 4.011742312465073, | |
| "learning_rate": 8.567981689160654e-05, | |
| "loss": 2.3442, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 3.2291021671826625, | |
| "grad_norm": 3.661871017054601, | |
| "learning_rate": 8.562104572948185e-05, | |
| "loss": 2.3394, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 3.235294117647059, | |
| "grad_norm": 4.123772593484842, | |
| "learning_rate": 8.556217445973735e-05, | |
| "loss": 2.571, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 3.2414860681114552, | |
| "grad_norm": 3.938828313810375, | |
| "learning_rate": 8.550320324782197e-05, | |
| "loss": 2.3857, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 3.2476780185758516, | |
| "grad_norm": 3.625870077036559, | |
| "learning_rate": 8.544413225946545e-05, | |
| "loss": 2.3553, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 3.2538699690402475, | |
| "grad_norm": 3.7489633832482023, | |
| "learning_rate": 8.538496166067798e-05, | |
| "loss": 2.402, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 3.260061919504644, | |
| "grad_norm": 3.8589651067740354, | |
| "learning_rate": 8.532569161774968e-05, | |
| "loss": 2.3985, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 3.26625386996904, | |
| "grad_norm": 5.456327193741036, | |
| "learning_rate": 8.52663222972501e-05, | |
| "loss": 2.5039, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 3.2724458204334366, | |
| "grad_norm": 4.052545897678689, | |
| "learning_rate": 8.520685386602792e-05, | |
| "loss": 2.4497, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.278637770897833, | |
| "grad_norm": 3.5992235741605474, | |
| "learning_rate": 8.514728649121018e-05, | |
| "loss": 2.3914, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 3.2848297213622293, | |
| "grad_norm": 3.432155774584645, | |
| "learning_rate": 8.50876203402021e-05, | |
| "loss": 2.386, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 3.291021671826625, | |
| "grad_norm": 3.8557093863718346, | |
| "learning_rate": 8.502785558068648e-05, | |
| "loss": 2.3473, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 3.2972136222910216, | |
| "grad_norm": 4.9666307995715915, | |
| "learning_rate": 8.496799238062325e-05, | |
| "loss": 2.4944, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 3.303405572755418, | |
| "grad_norm": 3.917374781482906, | |
| "learning_rate": 8.490803090824895e-05, | |
| "loss": 2.4824, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 3.3095975232198143, | |
| "grad_norm": 3.969181260137565, | |
| "learning_rate": 8.484797133207633e-05, | |
| "loss": 2.374, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 3.3157894736842106, | |
| "grad_norm": 3.7224610182620217, | |
| "learning_rate": 8.478781382089387e-05, | |
| "loss": 2.5348, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 3.321981424148607, | |
| "grad_norm": 4.1424980914113165, | |
| "learning_rate": 8.472755854376521e-05, | |
| "loss": 2.3643, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 3.3281733746130033, | |
| "grad_norm": 3.722762538292435, | |
| "learning_rate": 8.466720567002885e-05, | |
| "loss": 2.384, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 3.3343653250773992, | |
| "grad_norm": 3.8280649398435034, | |
| "learning_rate": 8.460675536929748e-05, | |
| "loss": 2.3841, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.3405572755417956, | |
| "grad_norm": 4.837273639103312, | |
| "learning_rate": 8.454620781145762e-05, | |
| "loss": 2.5264, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 3.346749226006192, | |
| "grad_norm": 4.19293715559688, | |
| "learning_rate": 8.448556316666912e-05, | |
| "loss": 2.3774, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 3.3529411764705883, | |
| "grad_norm": 3.5839163084269927, | |
| "learning_rate": 8.442482160536469e-05, | |
| "loss": 2.3416, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 3.3591331269349847, | |
| "grad_norm": 4.333258685944541, | |
| "learning_rate": 8.436398329824942e-05, | |
| "loss": 2.4427, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 3.3653250773993806, | |
| "grad_norm": 3.478712498678592, | |
| "learning_rate": 8.430304841630023e-05, | |
| "loss": 2.4323, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 3.371517027863777, | |
| "grad_norm": 4.32087845434783, | |
| "learning_rate": 8.424201713076553e-05, | |
| "loss": 2.3134, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 3.3777089783281733, | |
| "grad_norm": 3.9274514954655815, | |
| "learning_rate": 8.41808896131646e-05, | |
| "loss": 2.398, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 3.3839009287925697, | |
| "grad_norm": 4.053745527455987, | |
| "learning_rate": 8.411966603528716e-05, | |
| "loss": 2.3076, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 3.390092879256966, | |
| "grad_norm": 4.460782410782793, | |
| "learning_rate": 8.405834656919295e-05, | |
| "loss": 2.3321, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 3.3962848297213624, | |
| "grad_norm": 3.9192879825788647, | |
| "learning_rate": 8.399693138721116e-05, | |
| "loss": 2.3456, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.4024767801857587, | |
| "grad_norm": 4.239580955809909, | |
| "learning_rate": 8.393542066193994e-05, | |
| "loss": 2.4856, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 3.4086687306501546, | |
| "grad_norm": 3.707684771229425, | |
| "learning_rate": 8.387381456624599e-05, | |
| "loss": 2.3784, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 3.414860681114551, | |
| "grad_norm": 4.002189613200214, | |
| "learning_rate": 8.381211327326402e-05, | |
| "loss": 2.3785, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 3.4210526315789473, | |
| "grad_norm": 4.0245735243415535, | |
| "learning_rate": 8.375031695639631e-05, | |
| "loss": 2.4096, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 3.4272445820433437, | |
| "grad_norm": 3.570204013129324, | |
| "learning_rate": 8.368842578931214e-05, | |
| "loss": 2.4111, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 3.43343653250774, | |
| "grad_norm": 4.453628574002878, | |
| "learning_rate": 8.362643994594739e-05, | |
| "loss": 2.5287, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 3.4396284829721364, | |
| "grad_norm": 3.739785978737487, | |
| "learning_rate": 8.356435960050397e-05, | |
| "loss": 2.3796, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 3.4458204334365323, | |
| "grad_norm": 3.9547310234690323, | |
| "learning_rate": 8.350218492744944e-05, | |
| "loss": 2.3646, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 3.4520123839009287, | |
| "grad_norm": 3.5013107085457618, | |
| "learning_rate": 8.343991610151641e-05, | |
| "loss": 2.4255, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 3.458204334365325, | |
| "grad_norm": 3.7666231303992412, | |
| "learning_rate": 8.337755329770207e-05, | |
| "loss": 2.5381, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.4643962848297214, | |
| "grad_norm": 4.382382117785664, | |
| "learning_rate": 8.331509669126778e-05, | |
| "loss": 2.4004, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 3.4705882352941178, | |
| "grad_norm": 3.7395619287804345, | |
| "learning_rate": 8.325254645773848e-05, | |
| "loss": 2.3416, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 3.476780185758514, | |
| "grad_norm": 4.3553154299552705, | |
| "learning_rate": 8.318990277290224e-05, | |
| "loss": 2.3566, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 3.4829721362229105, | |
| "grad_norm": 3.8059837871427935, | |
| "learning_rate": 8.312716581280976e-05, | |
| "loss": 2.517, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 3.4891640866873064, | |
| "grad_norm": 3.784476713847252, | |
| "learning_rate": 8.306433575377388e-05, | |
| "loss": 2.4462, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 3.4953560371517027, | |
| "grad_norm": 3.628508905744923, | |
| "learning_rate": 8.30014127723691e-05, | |
| "loss": 2.4279, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 3.501547987616099, | |
| "grad_norm": 3.677250997486939, | |
| "learning_rate": 8.293839704543104e-05, | |
| "loss": 2.3862, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 3.5077399380804954, | |
| "grad_norm": 3.2429432477815503, | |
| "learning_rate": 8.287528875005595e-05, | |
| "loss": 2.5449, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 3.513931888544892, | |
| "grad_norm": 3.7827903049802085, | |
| "learning_rate": 8.281208806360027e-05, | |
| "loss": 2.5051, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 3.5201238390092877, | |
| "grad_norm": 3.7378668516720706, | |
| "learning_rate": 8.274879516368007e-05, | |
| "loss": 2.4024, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.526315789473684, | |
| "grad_norm": 3.4374901729582525, | |
| "learning_rate": 8.268541022817058e-05, | |
| "loss": 2.4265, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 3.5325077399380804, | |
| "grad_norm": 3.294348302203455, | |
| "learning_rate": 8.262193343520568e-05, | |
| "loss": 2.3196, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 3.538699690402477, | |
| "grad_norm": 3.390087246250273, | |
| "learning_rate": 8.255836496317739e-05, | |
| "loss": 2.33, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 3.544891640866873, | |
| "grad_norm": 3.6843958177256524, | |
| "learning_rate": 8.24947049907354e-05, | |
| "loss": 2.4453, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 3.5510835913312695, | |
| "grad_norm": 3.7638544633311866, | |
| "learning_rate": 8.243095369678652e-05, | |
| "loss": 2.359, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 3.557275541795666, | |
| "grad_norm": 3.774344465762464, | |
| "learning_rate": 8.236711126049427e-05, | |
| "loss": 2.4222, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 3.5634674922600618, | |
| "grad_norm": 3.671051399878454, | |
| "learning_rate": 8.230317786127821e-05, | |
| "loss": 2.5869, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 3.569659442724458, | |
| "grad_norm": 3.2906514603638803, | |
| "learning_rate": 8.223915367881364e-05, | |
| "loss": 2.3977, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 3.5758513931888545, | |
| "grad_norm": 3.4347910639301538, | |
| "learning_rate": 8.217503889303089e-05, | |
| "loss": 2.5456, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 3.582043343653251, | |
| "grad_norm": 3.800848071745359, | |
| "learning_rate": 8.211083368411503e-05, | |
| "loss": 2.4824, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.588235294117647, | |
| "grad_norm": 3.6359215248511174, | |
| "learning_rate": 8.204653823250516e-05, | |
| "loss": 2.3716, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 3.594427244582043, | |
| "grad_norm": 4.226736841356074, | |
| "learning_rate": 8.198215271889405e-05, | |
| "loss": 2.4543, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 3.6006191950464395, | |
| "grad_norm": 3.6133621782855165, | |
| "learning_rate": 8.191767732422753e-05, | |
| "loss": 2.4124, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 3.606811145510836, | |
| "grad_norm": 3.8540886045466007, | |
| "learning_rate": 8.185311222970408e-05, | |
| "loss": 2.3871, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 3.613003095975232, | |
| "grad_norm": 3.8474432290665472, | |
| "learning_rate": 8.178845761677421e-05, | |
| "loss": 2.4103, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 3.6191950464396285, | |
| "grad_norm": 4.626307948711146, | |
| "learning_rate": 8.172371366714005e-05, | |
| "loss": 2.4615, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 3.625386996904025, | |
| "grad_norm": 4.37767462664029, | |
| "learning_rate": 8.165888056275478e-05, | |
| "loss": 2.2778, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 3.6315789473684212, | |
| "grad_norm": 3.2021961110763173, | |
| "learning_rate": 8.159395848582214e-05, | |
| "loss": 2.3953, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 3.6377708978328176, | |
| "grad_norm": 3.617124402696774, | |
| "learning_rate": 8.152894761879593e-05, | |
| "loss": 2.3027, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 3.6439628482972135, | |
| "grad_norm": 4.484054406918155, | |
| "learning_rate": 8.146384814437942e-05, | |
| "loss": 2.5096, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.65015479876161, | |
| "grad_norm": 4.108253074688598, | |
| "learning_rate": 8.139866024552501e-05, | |
| "loss": 2.3963, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 3.656346749226006, | |
| "grad_norm": 3.884001309713194, | |
| "learning_rate": 8.13333841054335e-05, | |
| "loss": 2.3743, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 3.6625386996904026, | |
| "grad_norm": 3.2469329314047286, | |
| "learning_rate": 8.126801990755371e-05, | |
| "loss": 2.3849, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 3.6687306501547985, | |
| "grad_norm": 3.680814463784443, | |
| "learning_rate": 8.120256783558193e-05, | |
| "loss": 2.4079, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 3.674922600619195, | |
| "grad_norm": 3.7527216066173783, | |
| "learning_rate": 8.113702807346146e-05, | |
| "loss": 2.4133, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 3.681114551083591, | |
| "grad_norm": 3.9183133821763816, | |
| "learning_rate": 8.107140080538196e-05, | |
| "loss": 2.3375, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 3.6873065015479876, | |
| "grad_norm": 3.5880106416119575, | |
| "learning_rate": 8.100568621577907e-05, | |
| "loss": 2.4574, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 3.693498452012384, | |
| "grad_norm": 3.8184548274334236, | |
| "learning_rate": 8.093988448933379e-05, | |
| "loss": 2.3915, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 3.6996904024767803, | |
| "grad_norm": 4.008539397400194, | |
| "learning_rate": 8.087399581097204e-05, | |
| "loss": 2.4552, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 3.7058823529411766, | |
| "grad_norm": 3.611810991037467, | |
| "learning_rate": 8.080802036586408e-05, | |
| "loss": 2.5122, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.712074303405573, | |
| "grad_norm": 3.6007336687193416, | |
| "learning_rate": 8.074195833942404e-05, | |
| "loss": 2.2904, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 3.718266253869969, | |
| "grad_norm": 3.661330589005197, | |
| "learning_rate": 8.067580991730939e-05, | |
| "loss": 2.5167, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 3.7244582043343653, | |
| "grad_norm": 3.701053256190786, | |
| "learning_rate": 8.060957528542032e-05, | |
| "loss": 2.3615, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 3.7306501547987616, | |
| "grad_norm": 3.560815000825057, | |
| "learning_rate": 8.054325462989938e-05, | |
| "loss": 2.4362, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 3.736842105263158, | |
| "grad_norm": 3.6833443377659254, | |
| "learning_rate": 8.047684813713086e-05, | |
| "loss": 2.3669, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 3.7430340557275543, | |
| "grad_norm": 3.6944003385921436, | |
| "learning_rate": 8.041035599374026e-05, | |
| "loss": 2.39, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 3.7492260061919502, | |
| "grad_norm": 3.722950982457078, | |
| "learning_rate": 8.034377838659379e-05, | |
| "loss": 2.4393, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 3.7554179566563466, | |
| "grad_norm": 3.391406440568772, | |
| "learning_rate": 8.027711550279788e-05, | |
| "loss": 2.4148, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 3.761609907120743, | |
| "grad_norm": 4.116410869786648, | |
| "learning_rate": 8.021036752969859e-05, | |
| "loss": 2.4822, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 3.7678018575851393, | |
| "grad_norm": 3.806836392232153, | |
| "learning_rate": 8.014353465488109e-05, | |
| "loss": 2.4317, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 3.7739938080495357, | |
| "grad_norm": 3.7907290625778396, | |
| "learning_rate": 8.007661706616918e-05, | |
| "loss": 2.5755, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 3.780185758513932, | |
| "grad_norm": 3.857390237638102, | |
| "learning_rate": 8.000961495162475e-05, | |
| "loss": 2.5528, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 3.7863777089783284, | |
| "grad_norm": 3.691317382868572, | |
| "learning_rate": 7.99425284995472e-05, | |
| "loss": 2.3678, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 3.7925696594427247, | |
| "grad_norm": 4.22434396093653, | |
| "learning_rate": 7.987535789847297e-05, | |
| "loss": 2.375, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 3.7987616099071206, | |
| "grad_norm": 4.203684424148445, | |
| "learning_rate": 7.980810333717499e-05, | |
| "loss": 2.3522, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 3.804953560371517, | |
| "grad_norm": 3.5184765286157664, | |
| "learning_rate": 7.974076500466215e-05, | |
| "loss": 2.498, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 3.8111455108359134, | |
| "grad_norm": 4.091552173859175, | |
| "learning_rate": 7.967334309017875e-05, | |
| "loss": 2.5066, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 3.8173374613003097, | |
| "grad_norm": 3.7460766300362502, | |
| "learning_rate": 7.960583778320398e-05, | |
| "loss": 2.4016, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 3.8235294117647056, | |
| "grad_norm": 3.7076259071585373, | |
| "learning_rate": 7.953824927345145e-05, | |
| "loss": 2.4761, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 3.829721362229102, | |
| "grad_norm": 3.627102527680507, | |
| "learning_rate": 7.947057775086852e-05, | |
| "loss": 2.418, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.8359133126934983, | |
| "grad_norm": 3.596902423154603, | |
| "learning_rate": 7.940282340563585e-05, | |
| "loss": 2.3954, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 3.8421052631578947, | |
| "grad_norm": 3.7747227866779784, | |
| "learning_rate": 7.933498642816697e-05, | |
| "loss": 2.4548, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 3.848297213622291, | |
| "grad_norm": 4.023077395134154, | |
| "learning_rate": 7.926706700910749e-05, | |
| "loss": 2.4369, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 3.8544891640866874, | |
| "grad_norm": 4.038169577952473, | |
| "learning_rate": 7.91990653393348e-05, | |
| "loss": 2.5278, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 3.8606811145510838, | |
| "grad_norm": 3.8358270442024462, | |
| "learning_rate": 7.913098160995742e-05, | |
| "loss": 2.4348, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 3.86687306501548, | |
| "grad_norm": 4.151270695289064, | |
| "learning_rate": 7.906281601231448e-05, | |
| "loss": 2.4865, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 3.873065015479876, | |
| "grad_norm": 3.532376841609081, | |
| "learning_rate": 7.89945687379752e-05, | |
| "loss": 2.4158, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 3.8792569659442724, | |
| "grad_norm": 3.386589106491462, | |
| "learning_rate": 7.892623997873832e-05, | |
| "loss": 2.4313, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 3.8854489164086687, | |
| "grad_norm": 3.2592172663637657, | |
| "learning_rate": 7.885782992663162e-05, | |
| "loss": 2.3851, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 3.891640866873065, | |
| "grad_norm": 3.83030056141294, | |
| "learning_rate": 7.87893387739113e-05, | |
| "loss": 2.3898, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 3.8978328173374615, | |
| "grad_norm": 3.5379543160228244, | |
| "learning_rate": 7.87207667130615e-05, | |
| "loss": 2.3416, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 3.9040247678018574, | |
| "grad_norm": 3.9003164430953197, | |
| "learning_rate": 7.865211393679373e-05, | |
| "loss": 2.3958, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 3.9102167182662537, | |
| "grad_norm": 4.045214436387836, | |
| "learning_rate": 7.858338063804638e-05, | |
| "loss": 2.3821, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 3.91640866873065, | |
| "grad_norm": 3.772302312812738, | |
| "learning_rate": 7.851456700998405e-05, | |
| "loss": 2.4092, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 3.9226006191950464, | |
| "grad_norm": 4.002774187817698, | |
| "learning_rate": 7.844567324599719e-05, | |
| "loss": 2.3969, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 3.928792569659443, | |
| "grad_norm": 4.175141584948075, | |
| "learning_rate": 7.83766995397014e-05, | |
| "loss": 2.4838, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 3.934984520123839, | |
| "grad_norm": 3.9596695224737126, | |
| "learning_rate": 7.830764608493697e-05, | |
| "loss": 2.4405, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 3.9411764705882355, | |
| "grad_norm": 3.3284358322965133, | |
| "learning_rate": 7.823851307576828e-05, | |
| "loss": 2.3657, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 3.9473684210526314, | |
| "grad_norm": 3.2833880729162614, | |
| "learning_rate": 7.816930070648334e-05, | |
| "loss": 2.4343, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 3.9535603715170278, | |
| "grad_norm": 3.5684648107333707, | |
| "learning_rate": 7.810000917159315e-05, | |
| "loss": 2.4331, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.959752321981424, | |
| "grad_norm": 3.447912535908917, | |
| "learning_rate": 7.803063866583118e-05, | |
| "loss": 2.3603, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 3.9659442724458205, | |
| "grad_norm": 3.7491120649608445, | |
| "learning_rate": 7.796118938415289e-05, | |
| "loss": 2.5134, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 3.972136222910217, | |
| "grad_norm": 3.821071728011824, | |
| "learning_rate": 7.789166152173509e-05, | |
| "loss": 2.2568, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 3.9783281733746128, | |
| "grad_norm": 3.4413072268850855, | |
| "learning_rate": 7.782205527397541e-05, | |
| "loss": 2.3745, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 3.984520123839009, | |
| "grad_norm": 3.667189769208347, | |
| "learning_rate": 7.775237083649181e-05, | |
| "loss": 2.5052, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 3.9907120743034055, | |
| "grad_norm": 4.100509084367406, | |
| "learning_rate": 7.7682608405122e-05, | |
| "loss": 2.4049, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 3.996904024767802, | |
| "grad_norm": 3.6578604042215113, | |
| "learning_rate": 7.761276817592282e-05, | |
| "loss": 2.3555, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 3.5238815757576116, | |
| "learning_rate": 7.75428503451698e-05, | |
| "loss": 1.1803, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 4.006191950464396, | |
| "grad_norm": 3.1639437658042695, | |
| "learning_rate": 7.747285510935654e-05, | |
| "loss": 2.1137, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 4.012383900928793, | |
| "grad_norm": 3.457768945384432, | |
| "learning_rate": 7.74027826651942e-05, | |
| "loss": 2.1498, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 4.018575851393189, | |
| "grad_norm": 3.70029048006138, | |
| "learning_rate": 7.733263320961085e-05, | |
| "loss": 2.1948, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 4.024767801857585, | |
| "grad_norm": 4.126029827412395, | |
| "learning_rate": 7.726240693975111e-05, | |
| "loss": 2.15, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 4.030959752321982, | |
| "grad_norm": 5.558797321581609, | |
| "learning_rate": 7.719210405297537e-05, | |
| "loss": 2.1257, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 4.037151702786378, | |
| "grad_norm": 4.85006309138657, | |
| "learning_rate": 7.712172474685935e-05, | |
| "loss": 2.1376, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 4.043343653250774, | |
| "grad_norm": 6.4395438813022805, | |
| "learning_rate": 7.705126921919359e-05, | |
| "loss": 2.1444, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 4.04953560371517, | |
| "grad_norm": 6.041865846725888, | |
| "learning_rate": 7.698073766798281e-05, | |
| "loss": 2.0954, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 4.055727554179566, | |
| "grad_norm": 4.438802002995133, | |
| "learning_rate": 7.691013029144536e-05, | |
| "loss": 2.112, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 4.061919504643963, | |
| "grad_norm": 4.404903290145634, | |
| "learning_rate": 7.683944728801273e-05, | |
| "loss": 2.0828, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 4.068111455108359, | |
| "grad_norm": 4.778267563170224, | |
| "learning_rate": 7.676868885632893e-05, | |
| "loss": 2.181, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 4.074303405572755, | |
| "grad_norm": 4.459434975412655, | |
| "learning_rate": 7.669785519524993e-05, | |
| "loss": 2.0955, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 4.080495356037152, | |
| "grad_norm": 4.076696574359944, | |
| "learning_rate": 7.662694650384315e-05, | |
| "loss": 2.1908, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 4.086687306501548, | |
| "grad_norm": 4.145823898422436, | |
| "learning_rate": 7.655596298138683e-05, | |
| "loss": 2.1311, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 4.0928792569659445, | |
| "grad_norm": 4.557226017811323, | |
| "learning_rate": 7.648490482736959e-05, | |
| "loss": 2.1075, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 4.099071207430341, | |
| "grad_norm": 5.560413558024817, | |
| "learning_rate": 7.641377224148971e-05, | |
| "loss": 2.1539, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 4.105263157894737, | |
| "grad_norm": 4.87783371849139, | |
| "learning_rate": 7.634256542365468e-05, | |
| "loss": 2.1402, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 4.1114551083591335, | |
| "grad_norm": 5.246452612368868, | |
| "learning_rate": 7.62712845739806e-05, | |
| "loss": 2.1642, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 4.117647058823529, | |
| "grad_norm": 4.779665362725869, | |
| "learning_rate": 7.619992989279167e-05, | |
| "loss": 2.0213, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 4.123839009287925, | |
| "grad_norm": 4.4538999563034425, | |
| "learning_rate": 7.61285015806195e-05, | |
| "loss": 2.086, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 4.130030959752322, | |
| "grad_norm": 4.834496011319403, | |
| "learning_rate": 7.605699983820269e-05, | |
| "loss": 2.1693, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 4.136222910216718, | |
| "grad_norm": 3.9702391292351953, | |
| "learning_rate": 7.598542486648623e-05, | |
| "loss": 2.033, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 4.142414860681114, | |
| "grad_norm": 4.5764661716020205, | |
| "learning_rate": 7.591377686662081e-05, | |
| "loss": 2.0898, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 4.148606811145511, | |
| "grad_norm": 4.346261805320313, | |
| "learning_rate": 7.584205603996246e-05, | |
| "loss": 2.1802, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 4.154798761609907, | |
| "grad_norm": 5.188414338106125, | |
| "learning_rate": 7.577026258807181e-05, | |
| "loss": 2.1883, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 4.1609907120743035, | |
| "grad_norm": 4.5978526365501695, | |
| "learning_rate": 7.569839671271359e-05, | |
| "loss": 1.9735, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 4.1671826625387, | |
| "grad_norm": 4.487198124539332, | |
| "learning_rate": 7.562645861585616e-05, | |
| "loss": 1.9817, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 4.173374613003096, | |
| "grad_norm": 4.7334745708324855, | |
| "learning_rate": 7.555444849967073e-05, | |
| "loss": 2.0571, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 4.179566563467493, | |
| "grad_norm": 5.372008408502007, | |
| "learning_rate": 7.548236656653095e-05, | |
| "loss": 2.1141, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 4.185758513931889, | |
| "grad_norm": 5.144798634366983, | |
| "learning_rate": 7.541021301901234e-05, | |
| "loss": 2.2067, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 4.191950464396285, | |
| "grad_norm": 4.924453474716802, | |
| "learning_rate": 7.533798805989164e-05, | |
| "loss": 2.0421, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 4.198142414860681, | |
| "grad_norm": 4.991735540662648, | |
| "learning_rate": 7.526569189214627e-05, | |
| "loss": 1.9691, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 4.204334365325077, | |
| "grad_norm": 5.092517221614254, | |
| "learning_rate": 7.519332471895384e-05, | |
| "loss": 2.0528, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 4.2105263157894735, | |
| "grad_norm": 5.268399238043823, | |
| "learning_rate": 7.512088674369143e-05, | |
| "loss": 2.112, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 4.21671826625387, | |
| "grad_norm": 4.514936671947266, | |
| "learning_rate": 7.504837816993514e-05, | |
| "loss": 2.1414, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 4.222910216718266, | |
| "grad_norm": 4.674392659937627, | |
| "learning_rate": 7.497579920145945e-05, | |
| "loss": 2.2437, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 4.2291021671826625, | |
| "grad_norm": 4.143299960033866, | |
| "learning_rate": 7.490315004223672e-05, | |
| "loss": 2.1199, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 4.235294117647059, | |
| "grad_norm": 4.38729868716052, | |
| "learning_rate": 7.483043089643653e-05, | |
| "loss": 2.167, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 4.241486068111455, | |
| "grad_norm": 4.39687107676024, | |
| "learning_rate": 7.475764196842516e-05, | |
| "loss": 2.1273, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 4.247678018575852, | |
| "grad_norm": 5.0233257992601335, | |
| "learning_rate": 7.468478346276499e-05, | |
| "loss": 2.0826, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 4.253869969040248, | |
| "grad_norm": 12.022154855461501, | |
| "learning_rate": 7.4611855584214e-05, | |
| "loss": 2.1096, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 4.260061919504644, | |
| "grad_norm": 4.873247412293039, | |
| "learning_rate": 7.453885853772503e-05, | |
| "loss": 2.2862, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 4.266253869969041, | |
| "grad_norm": 4.969070770475527, | |
| "learning_rate": 7.446579252844535e-05, | |
| "loss": 2.0693, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 4.272445820433436, | |
| "grad_norm": 5.014674488114116, | |
| "learning_rate": 7.439265776171611e-05, | |
| "loss": 2.0325, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 4.2786377708978325, | |
| "grad_norm": 4.693562745355228, | |
| "learning_rate": 7.431945444307157e-05, | |
| "loss": 2.0993, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 4.284829721362229, | |
| "grad_norm": 5.091836740664018, | |
| "learning_rate": 7.424618277823873e-05, | |
| "loss": 2.1701, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 4.291021671826625, | |
| "grad_norm": 4.825237510682655, | |
| "learning_rate": 7.417284297313664e-05, | |
| "loss": 2.2776, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 4.2972136222910216, | |
| "grad_norm": 4.748682210483545, | |
| "learning_rate": 7.409943523387586e-05, | |
| "loss": 2.1297, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 4.303405572755418, | |
| "grad_norm": 4.0103272586261065, | |
| "learning_rate": 7.402595976675785e-05, | |
| "loss": 2.1662, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 4.309597523219814, | |
| "grad_norm": 4.275615365901843, | |
| "learning_rate": 7.395241677827438e-05, | |
| "loss": 2.2649, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 4.315789473684211, | |
| "grad_norm": 4.569190967353905, | |
| "learning_rate": 7.387880647510709e-05, | |
| "loss": 1.9498, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 4.321981424148607, | |
| "grad_norm": 3.999971515413252, | |
| "learning_rate": 7.380512906412667e-05, | |
| "loss": 2.0736, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.328173374613003, | |
| "grad_norm": 4.799921274732086, | |
| "learning_rate": 7.373138475239249e-05, | |
| "loss": 2.1249, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 4.3343653250774, | |
| "grad_norm": 5.252410492234698, | |
| "learning_rate": 7.365757374715187e-05, | |
| "loss": 2.1682, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 4.340557275541796, | |
| "grad_norm": 4.680441025779536, | |
| "learning_rate": 7.358369625583965e-05, | |
| "loss": 2.1072, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 4.346749226006192, | |
| "grad_norm": 5.346243143190861, | |
| "learning_rate": 7.350975248607743e-05, | |
| "loss": 2.1245, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 4.352941176470588, | |
| "grad_norm": 5.235167621935557, | |
| "learning_rate": 7.34357426456731e-05, | |
| "loss": 1.9862, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 4.359133126934984, | |
| "grad_norm": 4.569103725568221, | |
| "learning_rate": 7.336166694262028e-05, | |
| "loss": 2.0838, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 4.365325077399381, | |
| "grad_norm": 5.117945643586851, | |
| "learning_rate": 7.328752558509761e-05, | |
| "loss": 2.2451, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 4.371517027863777, | |
| "grad_norm": 5.125143550125818, | |
| "learning_rate": 7.321331878146834e-05, | |
| "loss": 2.1555, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 4.377708978328173, | |
| "grad_norm": 4.483992154106004, | |
| "learning_rate": 7.313904674027954e-05, | |
| "loss": 2.0191, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 4.38390092879257, | |
| "grad_norm": 4.574311355191686, | |
| "learning_rate": 7.306470967026169e-05, | |
| "loss": 2.2059, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 4.390092879256966, | |
| "grad_norm": 4.214723810324921, | |
| "learning_rate": 7.299030778032799e-05, | |
| "loss": 2.1897, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 4.396284829721362, | |
| "grad_norm": 4.122839871149707, | |
| "learning_rate": 7.291584127957384e-05, | |
| "loss": 2.2548, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 4.402476780185759, | |
| "grad_norm": 3.6452057194340504, | |
| "learning_rate": 7.284131037727618e-05, | |
| "loss": 2.1921, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 4.408668730650155, | |
| "grad_norm": 4.168442986791581, | |
| "learning_rate": 7.276671528289299e-05, | |
| "loss": 2.2092, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 4.414860681114551, | |
| "grad_norm": 4.773183982640468, | |
| "learning_rate": 7.269205620606259e-05, | |
| "loss": 2.1974, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 4.421052631578947, | |
| "grad_norm": 4.617784023066425, | |
| "learning_rate": 7.261733335660317e-05, | |
| "loss": 2.1342, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 4.427244582043343, | |
| "grad_norm": 5.1080835688057125, | |
| "learning_rate": 7.25425469445121e-05, | |
| "loss": 2.0862, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 4.43343653250774, | |
| "grad_norm": 4.832245298522228, | |
| "learning_rate": 7.246769717996539e-05, | |
| "loss": 2.141, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 4.439628482972136, | |
| "grad_norm": 4.680422668639977, | |
| "learning_rate": 7.239278427331717e-05, | |
| "loss": 2.1117, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 4.445820433436532, | |
| "grad_norm": 4.5217290454953165, | |
| "learning_rate": 7.231780843509889e-05, | |
| "loss": 2.1004, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 4.452012383900929, | |
| "grad_norm": 4.450258321106715, | |
| "learning_rate": 7.224276987601895e-05, | |
| "loss": 2.2117, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 4.458204334365325, | |
| "grad_norm": 4.203679275635295, | |
| "learning_rate": 7.216766880696199e-05, | |
| "loss": 2.1465, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 4.464396284829721, | |
| "grad_norm": 4.355702552457598, | |
| "learning_rate": 7.209250543898834e-05, | |
| "loss": 2.077, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 4.470588235294118, | |
| "grad_norm": 4.1943319549819655, | |
| "learning_rate": 7.201727998333336e-05, | |
| "loss": 2.1524, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 4.476780185758514, | |
| "grad_norm": 4.613968723058964, | |
| "learning_rate": 7.1941992651407e-05, | |
| "loss": 2.1386, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 4.4829721362229105, | |
| "grad_norm": 4.567908592761703, | |
| "learning_rate": 7.1866643654793e-05, | |
| "loss": 2.1904, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 4.489164086687307, | |
| "grad_norm": 5.156072091242819, | |
| "learning_rate": 7.179123320524848e-05, | |
| "loss": 2.0619, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 4.495356037151703, | |
| "grad_norm": 4.8433809937701655, | |
| "learning_rate": 7.171576151470318e-05, | |
| "loss": 2.0997, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 4.5015479876160995, | |
| "grad_norm": 5.27251370417393, | |
| "learning_rate": 7.164022879525902e-05, | |
| "loss": 2.0835, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 4.507739938080495, | |
| "grad_norm": 4.535212409355026, | |
| "learning_rate": 7.156463525918942e-05, | |
| "loss": 2.2414, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 4.513931888544891, | |
| "grad_norm": 4.961853408192757, | |
| "learning_rate": 7.148898111893867e-05, | |
| "loss": 2.2253, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 4.520123839009288, | |
| "grad_norm": 4.139010929875864, | |
| "learning_rate": 7.141326658712143e-05, | |
| "loss": 2.1412, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 4.526315789473684, | |
| "grad_norm": 4.878561442903061, | |
| "learning_rate": 7.133749187652208e-05, | |
| "loss": 2.2329, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 4.53250773993808, | |
| "grad_norm": 4.446855558161751, | |
| "learning_rate": 7.126165720009406e-05, | |
| "loss": 2.2213, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 4.538699690402477, | |
| "grad_norm": 4.824724533257683, | |
| "learning_rate": 7.118576277095944e-05, | |
| "loss": 2.1342, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 4.544891640866873, | |
| "grad_norm": 4.113461661901802, | |
| "learning_rate": 7.110980880240814e-05, | |
| "loss": 2.0658, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 4.5510835913312695, | |
| "grad_norm": 4.581936636361705, | |
| "learning_rate": 7.10337955078974e-05, | |
| "loss": 2.197, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 4.557275541795666, | |
| "grad_norm": 4.943239352273858, | |
| "learning_rate": 7.095772310105124e-05, | |
| "loss": 2.2231, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 4.563467492260062, | |
| "grad_norm": 4.870911601918529, | |
| "learning_rate": 7.088159179565977e-05, | |
| "loss": 2.2161, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 4.569659442724459, | |
| "grad_norm": 4.6578229886289195, | |
| "learning_rate": 7.080540180567862e-05, | |
| "loss": 2.1472, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 4.575851393188854, | |
| "grad_norm": 4.55833159189419, | |
| "learning_rate": 7.07291533452284e-05, | |
| "loss": 2.1873, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 4.58204334365325, | |
| "grad_norm": 4.7889800503626265, | |
| "learning_rate": 7.065284662859395e-05, | |
| "loss": 2.1148, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 4.588235294117647, | |
| "grad_norm": 4.511730121280619, | |
| "learning_rate": 7.05764818702239e-05, | |
| "loss": 2.2623, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 4.594427244582043, | |
| "grad_norm": 4.343408072498585, | |
| "learning_rate": 7.050005928473e-05, | |
| "loss": 2.1682, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 4.6006191950464395, | |
| "grad_norm": 4.44509304970608, | |
| "learning_rate": 7.042357908688646e-05, | |
| "loss": 2.134, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 4.606811145510836, | |
| "grad_norm": 4.581179901763817, | |
| "learning_rate": 7.034704149162944e-05, | |
| "loss": 2.1382, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 4.613003095975232, | |
| "grad_norm": 3.9424465575413827, | |
| "learning_rate": 7.027044671405643e-05, | |
| "loss": 2.2045, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 4.6191950464396285, | |
| "grad_norm": 4.348375839574913, | |
| "learning_rate": 7.019379496942556e-05, | |
| "loss": 2.1715, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 4.625386996904025, | |
| "grad_norm": 4.1548331420612215, | |
| "learning_rate": 7.011708647315509e-05, | |
| "loss": 2.2015, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 4.631578947368421, | |
| "grad_norm": 4.6915592822878045, | |
| "learning_rate": 7.004032144082281e-05, | |
| "loss": 2.2613, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.637770897832818, | |
| "grad_norm": 4.630737026765936, | |
| "learning_rate": 6.996350008816532e-05, | |
| "loss": 2.291, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 4.643962848297214, | |
| "grad_norm": 4.378714794510802, | |
| "learning_rate": 6.988662263107754e-05, | |
| "loss": 2.0821, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 4.65015479876161, | |
| "grad_norm": 5.107819820070701, | |
| "learning_rate": 6.980968928561209e-05, | |
| "loss": 2.174, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 4.656346749226007, | |
| "grad_norm": 4.45323535034182, | |
| "learning_rate": 6.97327002679786e-05, | |
| "loss": 2.1216, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 4.662538699690402, | |
| "grad_norm": 4.618964933752295, | |
| "learning_rate": 6.965565579454322e-05, | |
| "loss": 2.1177, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 4.6687306501547985, | |
| "grad_norm": 4.00692278763202, | |
| "learning_rate": 6.957855608182787e-05, | |
| "loss": 2.0692, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 4.674922600619195, | |
| "grad_norm": 4.568606275897397, | |
| "learning_rate": 6.950140134650979e-05, | |
| "loss": 2.1255, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 4.681114551083591, | |
| "grad_norm": 4.7575605136273005, | |
| "learning_rate": 6.942419180542081e-05, | |
| "loss": 2.1642, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 4.687306501547988, | |
| "grad_norm": 4.31453860474588, | |
| "learning_rate": 6.93469276755468e-05, | |
| "loss": 2.2458, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 4.693498452012384, | |
| "grad_norm": 4.929465346006104, | |
| "learning_rate": 6.926960917402701e-05, | |
| "loss": 2.2451, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 4.69969040247678, | |
| "grad_norm": 4.542828192154121, | |
| "learning_rate": 6.919223651815356e-05, | |
| "loss": 2.1707, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 4.705882352941177, | |
| "grad_norm": 4.087682222344042, | |
| "learning_rate": 6.911480992537071e-05, | |
| "loss": 2.2478, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 4.712074303405573, | |
| "grad_norm": 4.253186811157624, | |
| "learning_rate": 6.903732961327432e-05, | |
| "loss": 2.2018, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 4.718266253869969, | |
| "grad_norm": 4.2362550214015595, | |
| "learning_rate": 6.895979579961119e-05, | |
| "loss": 2.2328, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 4.724458204334366, | |
| "grad_norm": 5.196159466568116, | |
| "learning_rate": 6.888220870227853e-05, | |
| "loss": 2.1508, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 4.730650154798761, | |
| "grad_norm": 3.8878861676649175, | |
| "learning_rate": 6.880456853932326e-05, | |
| "loss": 2.1151, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 4.7368421052631575, | |
| "grad_norm": 4.329023340754469, | |
| "learning_rate": 6.872687552894145e-05, | |
| "loss": 2.2116, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 4.743034055727554, | |
| "grad_norm": 4.335216771787078, | |
| "learning_rate": 6.864912988947767e-05, | |
| "loss": 2.1063, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 4.74922600619195, | |
| "grad_norm": 4.280907354310136, | |
| "learning_rate": 6.857133183942442e-05, | |
| "loss": 2.1185, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 4.755417956656347, | |
| "grad_norm": 4.621438296367508, | |
| "learning_rate": 6.849348159742146e-05, | |
| "loss": 2.0787, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 4.761609907120743, | |
| "grad_norm": 4.836976318228352, | |
| "learning_rate": 6.841557938225527e-05, | |
| "loss": 1.9922, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 4.767801857585139, | |
| "grad_norm": 4.89566581269698, | |
| "learning_rate": 6.833762541285836e-05, | |
| "loss": 2.1255, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 4.773993808049536, | |
| "grad_norm": 4.587551174655151, | |
| "learning_rate": 6.82596199083087e-05, | |
| "loss": 2.0862, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 4.780185758513932, | |
| "grad_norm": 4.098948304566244, | |
| "learning_rate": 6.818156308782911e-05, | |
| "loss": 2.1175, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 4.786377708978328, | |
| "grad_norm": 4.627078810465172, | |
| "learning_rate": 6.810345517078657e-05, | |
| "loss": 1.9263, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 4.792569659442725, | |
| "grad_norm": 4.446918518406074, | |
| "learning_rate": 6.80252963766917e-05, | |
| "loss": 2.154, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 4.798761609907121, | |
| "grad_norm": 4.250857173022367, | |
| "learning_rate": 6.794708692519815e-05, | |
| "loss": 2.1464, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 4.804953560371517, | |
| "grad_norm": 4.0136218324719595, | |
| "learning_rate": 6.786882703610182e-05, | |
| "loss": 2.0561, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 4.811145510835914, | |
| "grad_norm": 4.202736014858293, | |
| "learning_rate": 6.779051692934042e-05, | |
| "loss": 2.1862, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 4.817337461300309, | |
| "grad_norm": 4.282532848878059, | |
| "learning_rate": 6.771215682499284e-05, | |
| "loss": 2.1776, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 4.823529411764706, | |
| "grad_norm": 4.515107712023646, | |
| "learning_rate": 6.76337469432784e-05, | |
| "loss": 2.1185, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 4.829721362229102, | |
| "grad_norm": 3.9819214384755806, | |
| "learning_rate": 6.755528750455634e-05, | |
| "loss": 2.2076, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 4.835913312693498, | |
| "grad_norm": 4.6248577553741566, | |
| "learning_rate": 6.747677872932518e-05, | |
| "loss": 2.1507, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 4.842105263157895, | |
| "grad_norm": 4.768420241307226, | |
| "learning_rate": 6.739822083822208e-05, | |
| "loss": 2.2218, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 4.848297213622291, | |
| "grad_norm": 4.486527686954089, | |
| "learning_rate": 6.731961405202224e-05, | |
| "loss": 2.1709, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 4.854489164086687, | |
| "grad_norm": 4.429697591756185, | |
| "learning_rate": 6.724095859163829e-05, | |
| "loss": 2.1713, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 4.860681114551084, | |
| "grad_norm": 4.389569136042767, | |
| "learning_rate": 6.716225467811961e-05, | |
| "loss": 2.0795, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 4.86687306501548, | |
| "grad_norm": 4.715430340153762, | |
| "learning_rate": 6.70835025326518e-05, | |
| "loss": 2.1989, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 4.8730650154798765, | |
| "grad_norm": 4.7231462421730095, | |
| "learning_rate": 6.700470237655596e-05, | |
| "loss": 2.1325, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 4.879256965944273, | |
| "grad_norm": 4.545669489130173, | |
| "learning_rate": 6.692585443128813e-05, | |
| "loss": 2.0339, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 4.885448916408668, | |
| "grad_norm": 4.756837089506344, | |
| "learning_rate": 6.68469589184387e-05, | |
| "loss": 1.984, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 4.891640866873065, | |
| "grad_norm": 4.850076718618227, | |
| "learning_rate": 6.676801605973169e-05, | |
| "loss": 2.0877, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 4.897832817337461, | |
| "grad_norm": 4.587077738976434, | |
| "learning_rate": 6.668902607702419e-05, | |
| "loss": 2.1371, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 4.904024767801857, | |
| "grad_norm": 4.588091204300823, | |
| "learning_rate": 6.660998919230572e-05, | |
| "loss": 2.1427, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 4.910216718266254, | |
| "grad_norm": 4.912079545017205, | |
| "learning_rate": 6.653090562769763e-05, | |
| "loss": 2.1679, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 4.91640866873065, | |
| "grad_norm": 3.9804465702676044, | |
| "learning_rate": 6.645177560545245e-05, | |
| "loss": 2.2245, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 4.922600619195046, | |
| "grad_norm": 4.144902689757148, | |
| "learning_rate": 6.637259934795327e-05, | |
| "loss": 2.0949, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 4.928792569659443, | |
| "grad_norm": 4.456739380615653, | |
| "learning_rate": 6.62933770777131e-05, | |
| "loss": 2.1302, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 4.934984520123839, | |
| "grad_norm": 4.415767596457962, | |
| "learning_rate": 6.62141090173743e-05, | |
| "loss": 2.0592, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 4.9411764705882355, | |
| "grad_norm": 4.359927642233302, | |
| "learning_rate": 6.613479538970789e-05, | |
| "loss": 2.0891, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.947368421052632, | |
| "grad_norm": 4.204704189524659, | |
| "learning_rate": 6.605543641761292e-05, | |
| "loss": 2.1229, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 4.953560371517028, | |
| "grad_norm": 4.30895071405075, | |
| "learning_rate": 6.597603232411597e-05, | |
| "loss": 2.2005, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 4.959752321981425, | |
| "grad_norm": 3.9577540550312396, | |
| "learning_rate": 6.589658333237032e-05, | |
| "loss": 2.2028, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 4.965944272445821, | |
| "grad_norm": 4.49768446217552, | |
| "learning_rate": 6.581708966565546e-05, | |
| "loss": 2.1994, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 4.972136222910216, | |
| "grad_norm": 3.9205013178737276, | |
| "learning_rate": 6.573755154737651e-05, | |
| "loss": 2.1986, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 4.978328173374613, | |
| "grad_norm": 3.965357202346724, | |
| "learning_rate": 6.56579692010634e-05, | |
| "loss": 2.0587, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 4.984520123839009, | |
| "grad_norm": 4.5179420881623695, | |
| "learning_rate": 6.557834285037041e-05, | |
| "loss": 2.1724, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 4.9907120743034055, | |
| "grad_norm": 4.482924767634662, | |
| "learning_rate": 6.549867271907553e-05, | |
| "loss": 2.2078, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 4.996904024767802, | |
| "grad_norm": 4.6896685019691, | |
| "learning_rate": 6.541895903107969e-05, | |
| "loss": 1.9702, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 4.6896685019691, | |
| "learning_rate": 6.533920201040632e-05, | |
| "loss": 1.0959, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 5.006191950464396, | |
| "grad_norm": 5.097135707519353, | |
| "learning_rate": 6.525940188120059e-05, | |
| "loss": 1.8363, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 5.012383900928793, | |
| "grad_norm": 4.297658356502665, | |
| "learning_rate": 6.51795588677288e-05, | |
| "loss": 1.7269, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 5.018575851393189, | |
| "grad_norm": 4.170938719999973, | |
| "learning_rate": 6.509967319437781e-05, | |
| "loss": 1.6751, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 5.024767801857585, | |
| "grad_norm": 5.0993641270081715, | |
| "learning_rate": 6.501974508565437e-05, | |
| "loss": 1.7688, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 5.030959752321982, | |
| "grad_norm": 6.094023609301853, | |
| "learning_rate": 6.493977476618445e-05, | |
| "loss": 1.7442, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 5.037151702786378, | |
| "grad_norm": 7.029385168513187, | |
| "learning_rate": 6.485976246071269e-05, | |
| "loss": 1.7269, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 5.043343653250774, | |
| "grad_norm": 6.469279878438362, | |
| "learning_rate": 6.477970839410166e-05, | |
| "loss": 1.7327, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 5.04953560371517, | |
| "grad_norm": 7.090204265091153, | |
| "learning_rate": 6.469961279133138e-05, | |
| "loss": 1.6868, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 5.055727554179566, | |
| "grad_norm": 6.448327559102867, | |
| "learning_rate": 6.461947587749855e-05, | |
| "loss": 1.8091, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 5.061919504643963, | |
| "grad_norm": 6.285497602063599, | |
| "learning_rate": 6.453929787781595e-05, | |
| "loss": 1.6628, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 5.068111455108359, | |
| "grad_norm": 5.816454250548965, | |
| "learning_rate": 6.445907901761189e-05, | |
| "loss": 1.6682, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 5.074303405572755, | |
| "grad_norm": 6.3789917153433855, | |
| "learning_rate": 6.437881952232947e-05, | |
| "loss": 1.6749, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 5.080495356037152, | |
| "grad_norm": 6.259435870785406, | |
| "learning_rate": 6.429851961752597e-05, | |
| "loss": 1.7692, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 5.086687306501548, | |
| "grad_norm": 7.073683972141054, | |
| "learning_rate": 6.421817952887228e-05, | |
| "loss": 1.5728, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 5.0928792569659445, | |
| "grad_norm": 6.3192650941579975, | |
| "learning_rate": 6.413779948215218e-05, | |
| "loss": 1.6299, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 5.099071207430341, | |
| "grad_norm": 6.069050786842301, | |
| "learning_rate": 6.405737970326179e-05, | |
| "loss": 1.6522, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 5.105263157894737, | |
| "grad_norm": 5.602073764709582, | |
| "learning_rate": 6.397692041820885e-05, | |
| "loss": 1.6706, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 5.1114551083591335, | |
| "grad_norm": 5.638325928752947, | |
| "learning_rate": 6.389642185311215e-05, | |
| "loss": 1.7242, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 5.117647058823529, | |
| "grad_norm": 6.088790913801255, | |
| "learning_rate": 6.381588423420085e-05, | |
| "loss": 1.7472, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 5.123839009287925, | |
| "grad_norm": 5.959373545378721, | |
| "learning_rate": 6.373530778781391e-05, | |
| "loss": 1.7313, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 5.130030959752322, | |
| "grad_norm": 6.005814498691885, | |
| "learning_rate": 6.365469274039936e-05, | |
| "loss": 1.7649, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 5.136222910216718, | |
| "grad_norm": 6.080930391730417, | |
| "learning_rate": 6.357403931851371e-05, | |
| "loss": 1.6265, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 5.142414860681114, | |
| "grad_norm": 6.5286056711387355, | |
| "learning_rate": 6.349334774882137e-05, | |
| "loss": 1.7628, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 5.148606811145511, | |
| "grad_norm": 6.257759792582404, | |
| "learning_rate": 6.341261825809389e-05, | |
| "loss": 1.8872, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 5.154798761609907, | |
| "grad_norm": 6.206382485711229, | |
| "learning_rate": 6.333185107320945e-05, | |
| "loss": 1.7887, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 5.1609907120743035, | |
| "grad_norm": 6.443322713367943, | |
| "learning_rate": 6.325104642115214e-05, | |
| "loss": 1.8509, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 5.1671826625387, | |
| "grad_norm": 6.384300347287064, | |
| "learning_rate": 6.317020452901133e-05, | |
| "loss": 1.8043, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 5.173374613003096, | |
| "grad_norm": 5.890101319337755, | |
| "learning_rate": 6.308932562398109e-05, | |
| "loss": 1.8201, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 5.179566563467493, | |
| "grad_norm": 5.676545837020276, | |
| "learning_rate": 6.300840993335945e-05, | |
| "loss": 1.6744, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 5.185758513931889, | |
| "grad_norm": 5.442639908625545, | |
| "learning_rate": 6.292745768454787e-05, | |
| "loss": 1.6609, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 5.191950464396285, | |
| "grad_norm": 5.735651569209558, | |
| "learning_rate": 6.284646910505054e-05, | |
| "loss": 1.6535, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 5.198142414860681, | |
| "grad_norm": 5.301848332792911, | |
| "learning_rate": 6.276544442247373e-05, | |
| "loss": 1.7173, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 5.204334365325077, | |
| "grad_norm": 5.838168523934679, | |
| "learning_rate": 6.26843838645252e-05, | |
| "loss": 1.7795, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 5.2105263157894735, | |
| "grad_norm": 6.486729476474496, | |
| "learning_rate": 6.260328765901352e-05, | |
| "loss": 1.6847, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 5.21671826625387, | |
| "grad_norm": 6.080973168348242, | |
| "learning_rate": 6.252215603384743e-05, | |
| "loss": 1.8322, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 5.222910216718266, | |
| "grad_norm": 5.553751162760881, | |
| "learning_rate": 6.244098921703524e-05, | |
| "loss": 1.6801, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 5.2291021671826625, | |
| "grad_norm": 7.7187082516070475, | |
| "learning_rate": 6.235978743668415e-05, | |
| "loss": 1.7557, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 5.235294117647059, | |
| "grad_norm": 5.494934240433607, | |
| "learning_rate": 6.227855092099959e-05, | |
| "loss": 1.7204, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 5.241486068111455, | |
| "grad_norm": 5.400078857427008, | |
| "learning_rate": 6.219727989828466e-05, | |
| "loss": 1.8781, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 5.247678018575852, | |
| "grad_norm": 6.282165666257248, | |
| "learning_rate": 6.211597459693939e-05, | |
| "loss": 1.8185, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 5.253869969040248, | |
| "grad_norm": 6.206071203959746, | |
| "learning_rate": 6.203463524546017e-05, | |
| "loss": 1.8299, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 5.260061919504644, | |
| "grad_norm": 6.929875164782504, | |
| "learning_rate": 6.19532620724391e-05, | |
| "loss": 1.8488, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 5.266253869969041, | |
| "grad_norm": 6.94778058551181, | |
| "learning_rate": 6.187185530656328e-05, | |
| "loss": 1.6549, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 5.272445820433436, | |
| "grad_norm": 6.168749749728858, | |
| "learning_rate": 6.179041517661424e-05, | |
| "loss": 1.8255, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 5.2786377708978325, | |
| "grad_norm": 6.388791515764353, | |
| "learning_rate": 6.170894191146733e-05, | |
| "loss": 1.698, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 5.284829721362229, | |
| "grad_norm": 5.749811058169002, | |
| "learning_rate": 6.162743574009094e-05, | |
| "loss": 1.7379, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 5.291021671826625, | |
| "grad_norm": 5.833626724768377, | |
| "learning_rate": 6.154589689154594e-05, | |
| "loss": 1.7003, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 5.2972136222910216, | |
| "grad_norm": 5.5678551392379845, | |
| "learning_rate": 6.146432559498513e-05, | |
| "loss": 1.8987, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 5.303405572755418, | |
| "grad_norm": 5.858138841566609, | |
| "learning_rate": 6.138272207965238e-05, | |
| "loss": 1.8512, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 5.309597523219814, | |
| "grad_norm": 5.673537543847247, | |
| "learning_rate": 6.130108657488219e-05, | |
| "loss": 1.7378, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 5.315789473684211, | |
| "grad_norm": 6.096191086369572, | |
| "learning_rate": 6.121941931009894e-05, | |
| "loss": 1.7862, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 5.321981424148607, | |
| "grad_norm": 6.86038785401145, | |
| "learning_rate": 6.113772051481622e-05, | |
| "loss": 1.6807, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 5.328173374613003, | |
| "grad_norm": 5.864326184212158, | |
| "learning_rate": 6.105599041863631e-05, | |
| "loss": 1.7716, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 5.3343653250774, | |
| "grad_norm": 6.269969066027309, | |
| "learning_rate": 6.09742292512494e-05, | |
| "loss": 1.8457, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 5.340557275541796, | |
| "grad_norm": 6.933266717469394, | |
| "learning_rate": 6.0892437242433035e-05, | |
| "loss": 1.6483, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 5.346749226006192, | |
| "grad_norm": 6.0658603077371405, | |
| "learning_rate": 6.0810614622051396e-05, | |
| "loss": 1.6833, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 5.352941176470588, | |
| "grad_norm": 6.2928527950892805, | |
| "learning_rate": 6.072876162005474e-05, | |
| "loss": 1.7531, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 5.359133126934984, | |
| "grad_norm": 5.7918387029923215, | |
| "learning_rate": 6.064687846647864e-05, | |
| "loss": 1.7937, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 5.365325077399381, | |
| "grad_norm": 6.581852936909009, | |
| "learning_rate": 6.056496539144351e-05, | |
| "loss": 1.6215, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 5.371517027863777, | |
| "grad_norm": 5.973762507331246, | |
| "learning_rate": 6.0483022625153755e-05, | |
| "loss": 1.856, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 5.377708978328173, | |
| "grad_norm": 5.568545853686291, | |
| "learning_rate": 6.040105039789726e-05, | |
| "loss": 1.8014, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 5.38390092879257, | |
| "grad_norm": 5.262578381419057, | |
| "learning_rate": 6.031904894004471e-05, | |
| "loss": 1.729, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 5.390092879256966, | |
| "grad_norm": 6.009994797740327, | |
| "learning_rate": 6.023701848204893e-05, | |
| "loss": 1.7577, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 5.396284829721362, | |
| "grad_norm": 6.362254949364708, | |
| "learning_rate": 6.015495925444429e-05, | |
| "loss": 1.7988, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 5.402476780185759, | |
| "grad_norm": 5.5039823944568775, | |
| "learning_rate": 6.007287148784591e-05, | |
| "loss": 1.8077, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 5.408668730650155, | |
| "grad_norm": 5.968607801035917, | |
| "learning_rate": 5.999075541294921e-05, | |
| "loss": 1.8124, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 5.414860681114551, | |
| "grad_norm": 6.214379080156402, | |
| "learning_rate": 5.9908611260529135e-05, | |
| "loss": 1.8006, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 5.421052631578947, | |
| "grad_norm": 6.204207450749872, | |
| "learning_rate": 5.982643926143954e-05, | |
| "loss": 1.7918, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 5.427244582043343, | |
| "grad_norm": 5.6408213185164895, | |
| "learning_rate": 5.974423964661249e-05, | |
| "loss": 1.8952, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 5.43343653250774, | |
| "grad_norm": 6.347907311283864, | |
| "learning_rate": 5.966201264705778e-05, | |
| "loss": 1.749, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 5.439628482972136, | |
| "grad_norm": 5.809574613917347, | |
| "learning_rate": 5.957975849386202e-05, | |
| "loss": 1.8773, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 5.445820433436532, | |
| "grad_norm": 5.632385846674114, | |
| "learning_rate": 5.949747741818824e-05, | |
| "loss": 1.7677, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 5.452012383900929, | |
| "grad_norm": 5.440696939412205, | |
| "learning_rate": 5.941516965127509e-05, | |
| "loss": 1.8766, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 5.458204334365325, | |
| "grad_norm": 6.022036249548154, | |
| "learning_rate": 5.933283542443622e-05, | |
| "loss": 1.7556, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 5.464396284829721, | |
| "grad_norm": 5.189714160763887, | |
| "learning_rate": 5.925047496905968e-05, | |
| "loss": 1.7301, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 5.470588235294118, | |
| "grad_norm": 6.485624186917087, | |
| "learning_rate": 5.916808851660718e-05, | |
| "loss": 1.9348, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 5.476780185758514, | |
| "grad_norm": 6.217627206801023, | |
| "learning_rate": 5.9085676298613534e-05, | |
| "loss": 1.7815, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 5.4829721362229105, | |
| "grad_norm": 6.97023254823353, | |
| "learning_rate": 5.900323854668597e-05, | |
| "loss": 1.8831, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 5.489164086687307, | |
| "grad_norm": 5.216736331651397, | |
| "learning_rate": 5.892077549250341e-05, | |
| "loss": 1.7848, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 5.495356037151703, | |
| "grad_norm": 5.512287508329415, | |
| "learning_rate": 5.8838287367815966e-05, | |
| "loss": 1.8791, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 5.5015479876160995, | |
| "grad_norm": 5.711267110515132, | |
| "learning_rate": 5.875577440444418e-05, | |
| "loss": 1.7908, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 5.507739938080495, | |
| "grad_norm": 5.433396790662708, | |
| "learning_rate": 5.867323683427836e-05, | |
| "loss": 1.8233, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 5.513931888544891, | |
| "grad_norm": 6.448127794945795, | |
| "learning_rate": 5.8590674889277994e-05, | |
| "loss": 1.7747, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 5.520123839009288, | |
| "grad_norm": 5.7086178958780565, | |
| "learning_rate": 5.85080888014711e-05, | |
| "loss": 1.8987, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 5.526315789473684, | |
| "grad_norm": 5.583443142146346, | |
| "learning_rate": 5.842547880295353e-05, | |
| "loss": 1.8491, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 5.53250773993808, | |
| "grad_norm": 6.079781416829725, | |
| "learning_rate": 5.834284512588831e-05, | |
| "loss": 1.8515, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 5.538699690402477, | |
| "grad_norm": 5.6372864199748385, | |
| "learning_rate": 5.826018800250503e-05, | |
| "loss": 1.8367, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 5.544891640866873, | |
| "grad_norm": 5.515529122384791, | |
| "learning_rate": 5.817750766509915e-05, | |
| "loss": 1.6847, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 5.5510835913312695, | |
| "grad_norm": 5.334874432827247, | |
| "learning_rate": 5.809480434603143e-05, | |
| "loss": 1.7797, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 5.557275541795666, | |
| "grad_norm": 5.80140570708637, | |
| "learning_rate": 5.801207827772714e-05, | |
| "loss": 1.8654, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 5.563467492260062, | |
| "grad_norm": 6.321831941627306, | |
| "learning_rate": 5.7929329692675525e-05, | |
| "loss": 1.7084, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 5.569659442724459, | |
| "grad_norm": 5.412047294377396, | |
| "learning_rate": 5.784655882342912e-05, | |
| "loss": 1.8567, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 5.575851393188854, | |
| "grad_norm": 5.566852785419783, | |
| "learning_rate": 5.776376590260306e-05, | |
| "loss": 1.8087, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 5.58204334365325, | |
| "grad_norm": 5.652096604313271, | |
| "learning_rate": 5.768095116287444e-05, | |
| "loss": 1.7999, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 5.588235294117647, | |
| "grad_norm": 5.975655299586698, | |
| "learning_rate": 5.7598114836981734e-05, | |
| "loss": 1.8325, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 5.594427244582043, | |
| "grad_norm": 5.940380162039415, | |
| "learning_rate": 5.751525715772401e-05, | |
| "loss": 1.8353, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 5.6006191950464395, | |
| "grad_norm": 5.426752630449644, | |
| "learning_rate": 5.7432378357960415e-05, | |
| "loss": 1.7968, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 5.606811145510836, | |
| "grad_norm": 5.3791925890949255, | |
| "learning_rate": 5.734947867060938e-05, | |
| "loss": 1.7644, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 5.613003095975232, | |
| "grad_norm": 5.3770146741879685, | |
| "learning_rate": 5.726655832864809e-05, | |
| "loss": 1.821, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 5.6191950464396285, | |
| "grad_norm": 5.7576425510460405, | |
| "learning_rate": 5.718361756511177e-05, | |
| "loss": 1.8349, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 5.625386996904025, | |
| "grad_norm": 5.545319099253952, | |
| "learning_rate": 5.7100656613093005e-05, | |
| "loss": 1.8334, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 5.631578947368421, | |
| "grad_norm": 5.621097957860418, | |
| "learning_rate": 5.7017675705741156e-05, | |
| "loss": 1.7505, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 5.637770897832818, | |
| "grad_norm": 5.496504675395947, | |
| "learning_rate": 5.693467507626164e-05, | |
| "loss": 1.7661, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 5.643962848297214, | |
| "grad_norm": 5.440416868123972, | |
| "learning_rate": 5.685165495791534e-05, | |
| "loss": 1.761, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 5.65015479876161, | |
| "grad_norm": 5.587396623055035, | |
| "learning_rate": 5.6768615584017804e-05, | |
| "loss": 1.813, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 5.656346749226007, | |
| "grad_norm": 5.6813322643015125, | |
| "learning_rate": 5.6685557187938844e-05, | |
| "loss": 1.9138, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 5.662538699690402, | |
| "grad_norm": 5.724673293483267, | |
| "learning_rate": 5.660248000310162e-05, | |
| "loss": 1.8429, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 5.6687306501547985, | |
| "grad_norm": 5.7928122967608715, | |
| "learning_rate": 5.6519384262982144e-05, | |
| "loss": 1.8223, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 5.674922600619195, | |
| "grad_norm": 5.425499489197008, | |
| "learning_rate": 5.643627020110855e-05, | |
| "loss": 1.8533, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 5.681114551083591, | |
| "grad_norm": 5.36215523508532, | |
| "learning_rate": 5.635313805106047e-05, | |
| "loss": 1.6969, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 5.687306501547988, | |
| "grad_norm": 4.999203099014986, | |
| "learning_rate": 5.626998804646841e-05, | |
| "loss": 1.8429, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 5.693498452012384, | |
| "grad_norm": 5.928680338381235, | |
| "learning_rate": 5.618682042101297e-05, | |
| "loss": 1.8639, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 5.69969040247678, | |
| "grad_norm": 5.697996175032324, | |
| "learning_rate": 5.610363540842435e-05, | |
| "loss": 1.7222, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 5.705882352941177, | |
| "grad_norm": 5.580876924156143, | |
| "learning_rate": 5.602043324248157e-05, | |
| "loss": 1.8367, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 5.712074303405573, | |
| "grad_norm": 5.563961054575789, | |
| "learning_rate": 5.5937214157011884e-05, | |
| "loss": 1.7032, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 5.718266253869969, | |
| "grad_norm": 5.878100256797036, | |
| "learning_rate": 5.585397838589005e-05, | |
| "loss": 1.805, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 5.724458204334366, | |
| "grad_norm": 5.7076843044116075, | |
| "learning_rate": 5.577072616303779e-05, | |
| "loss": 1.8569, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 5.730650154798761, | |
| "grad_norm": 6.05372952918896, | |
| "learning_rate": 5.5687457722423e-05, | |
| "loss": 1.896, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 5.7368421052631575, | |
| "grad_norm": 5.16364163182538, | |
| "learning_rate": 5.5604173298059156e-05, | |
| "loss": 1.6782, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 5.743034055727554, | |
| "grad_norm": 5.6002530820827525, | |
| "learning_rate": 5.55208731240047e-05, | |
| "loss": 1.7387, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 5.74922600619195, | |
| "grad_norm": 5.471173638395719, | |
| "learning_rate": 5.5437557434362305e-05, | |
| "loss": 1.6704, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 5.755417956656347, | |
| "grad_norm": 5.614838368063709, | |
| "learning_rate": 5.535422646327826e-05, | |
| "loss": 1.9572, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 5.761609907120743, | |
| "grad_norm": 6.03455846560755, | |
| "learning_rate": 5.5270880444941764e-05, | |
| "loss": 1.9031, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 5.767801857585139, | |
| "grad_norm": 6.661401789268076, | |
| "learning_rate": 5.518751961358436e-05, | |
| "loss": 1.8565, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 5.773993808049536, | |
| "grad_norm": 5.647161883767869, | |
| "learning_rate": 5.510414420347918e-05, | |
| "loss": 1.8405, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 5.780185758513932, | |
| "grad_norm": 5.334550187571599, | |
| "learning_rate": 5.502075444894035e-05, | |
| "loss": 1.898, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 5.786377708978328, | |
| "grad_norm": 5.0499248707961675, | |
| "learning_rate": 5.493735058432227e-05, | |
| "loss": 1.872, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 5.792569659442725, | |
| "grad_norm": 5.50650425050073, | |
| "learning_rate": 5.485393284401905e-05, | |
| "loss": 1.7519, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 5.798761609907121, | |
| "grad_norm": 5.256311428845345, | |
| "learning_rate": 5.477050146246378e-05, | |
| "loss": 1.703, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 5.804953560371517, | |
| "grad_norm": 5.408181017988561, | |
| "learning_rate": 5.468705667412785e-05, | |
| "loss": 1.8261, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 5.811145510835914, | |
| "grad_norm": 5.484560920918981, | |
| "learning_rate": 5.4603598713520356e-05, | |
| "loss": 1.8205, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 5.817337461300309, | |
| "grad_norm": 5.728999670930413, | |
| "learning_rate": 5.452012781518743e-05, | |
| "loss": 1.6577, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 5.823529411764706, | |
| "grad_norm": 5.502548441271816, | |
| "learning_rate": 5.443664421371153e-05, | |
| "loss": 1.6221, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 5.829721362229102, | |
| "grad_norm": 6.35610081466655, | |
| "learning_rate": 5.435314814371082e-05, | |
| "loss": 1.7112, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 5.835913312693498, | |
| "grad_norm": 5.6172244449375075, | |
| "learning_rate": 5.426963983983853e-05, | |
| "loss": 1.7829, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 5.842105263157895, | |
| "grad_norm": 5.492954166804681, | |
| "learning_rate": 5.4186119536782246e-05, | |
| "loss": 1.7652, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 5.848297213622291, | |
| "grad_norm": 5.7804318465473425, | |
| "learning_rate": 5.410258746926328e-05, | |
| "loss": 1.7701, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 5.854489164086687, | |
| "grad_norm": 6.372706653206452, | |
| "learning_rate": 5.4019043872036015e-05, | |
| "loss": 1.6976, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 5.860681114551084, | |
| "grad_norm": 5.19023867382763, | |
| "learning_rate": 5.393548897988724e-05, | |
| "loss": 1.7425, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 5.86687306501548, | |
| "grad_norm": 5.313027581311771, | |
| "learning_rate": 5.3851923027635475e-05, | |
| "loss": 1.7379, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 5.8730650154798765, | |
| "grad_norm": 5.346043529771426, | |
| "learning_rate": 5.376834625013031e-05, | |
| "loss": 1.7895, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 5.879256965944273, | |
| "grad_norm": 5.6516277074371235, | |
| "learning_rate": 5.3684758882251794e-05, | |
| "loss": 1.6915, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 5.885448916408668, | |
| "grad_norm": 5.60282925527119, | |
| "learning_rate": 5.360116115890972e-05, | |
| "loss": 1.5807, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 5.891640866873065, | |
| "grad_norm": 5.756675233255651, | |
| "learning_rate": 5.351755331504297e-05, | |
| "loss": 1.8783, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 5.897832817337461, | |
| "grad_norm": 6.398985132652635, | |
| "learning_rate": 5.343393558561888e-05, | |
| "loss": 1.8418, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 5.904024767801857, | |
| "grad_norm": 5.714793081519792, | |
| "learning_rate": 5.3350308205632574e-05, | |
| "loss": 1.6848, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 5.910216718266254, | |
| "grad_norm": 5.732024375784504, | |
| "learning_rate": 5.3266671410106306e-05, | |
| "loss": 1.7741, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 5.91640866873065, | |
| "grad_norm": 5.777349638869013, | |
| "learning_rate": 5.318302543408875e-05, | |
| "loss": 1.7644, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 5.922600619195046, | |
| "grad_norm": 5.976598354398558, | |
| "learning_rate": 5.3099370512654426e-05, | |
| "loss": 1.7296, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 5.928792569659443, | |
| "grad_norm": 5.404269095636371, | |
| "learning_rate": 5.3015706880902974e-05, | |
| "loss": 1.6866, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 5.934984520123839, | |
| "grad_norm": 5.527542596369583, | |
| "learning_rate": 5.293203477395851e-05, | |
| "loss": 1.7146, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 5.9411764705882355, | |
| "grad_norm": 5.078043839033948, | |
| "learning_rate": 5.284835442696895e-05, | |
| "loss": 1.8524, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 5.947368421052632, | |
| "grad_norm": 5.772706720501868, | |
| "learning_rate": 5.276466607510544e-05, | |
| "loss": 1.7661, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 5.953560371517028, | |
| "grad_norm": 5.520796373876517, | |
| "learning_rate": 5.2680969953561545e-05, | |
| "loss": 1.882, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 5.959752321981425, | |
| "grad_norm": 5.827251604758201, | |
| "learning_rate": 5.259726629755267e-05, | |
| "loss": 1.8778, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 5.965944272445821, | |
| "grad_norm": 6.207373379919172, | |
| "learning_rate": 5.251355534231546e-05, | |
| "loss": 1.8627, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 5.972136222910216, | |
| "grad_norm": 5.123600713644226, | |
| "learning_rate": 5.2429837323107e-05, | |
| "loss": 1.7472, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 5.978328173374613, | |
| "grad_norm": 5.101450048523518, | |
| "learning_rate": 5.234611247520428e-05, | |
| "loss": 1.7643, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 5.984520123839009, | |
| "grad_norm": 5.408204446605995, | |
| "learning_rate": 5.2262381033903426e-05, | |
| "loss": 1.7875, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 5.9907120743034055, | |
| "grad_norm": 5.379723976429878, | |
| "learning_rate": 5.2178643234519164e-05, | |
| "loss": 1.7445, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 5.996904024767802, | |
| "grad_norm": 5.838975887259199, | |
| "learning_rate": 5.209489931238405e-05, | |
| "loss": 1.7112, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 5.840227146356309, | |
| "learning_rate": 5.201114950284782e-05, | |
| "loss": 0.8978, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 6.006191950464396, | |
| "grad_norm": 4.275597834318695, | |
| "learning_rate": 5.192739404127679e-05, | |
| "loss": 1.3108, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 6.012383900928793, | |
| "grad_norm": 5.030728682297231, | |
| "learning_rate": 5.1843633163053175e-05, | |
| "loss": 1.2031, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 6.018575851393189, | |
| "grad_norm": 5.8141889393279484, | |
| "learning_rate": 5.175986710357439e-05, | |
| "loss": 1.169, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 6.024767801857585, | |
| "grad_norm": 7.558224479046815, | |
| "learning_rate": 5.167609609825238e-05, | |
| "loss": 1.219, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 6.030959752321982, | |
| "grad_norm": 8.484873862857635, | |
| "learning_rate": 5.159232038251305e-05, | |
| "loss": 1.2784, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 6.037151702786378, | |
| "grad_norm": 9.329078391082556, | |
| "learning_rate": 5.1508540191795506e-05, | |
| "loss": 1.2931, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 6.043343653250774, | |
| "grad_norm": 8.234782351921575, | |
| "learning_rate": 5.142475576155146e-05, | |
| "loss": 1.1175, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 6.04953560371517, | |
| "grad_norm": 8.285760286310255, | |
| "learning_rate": 5.1340967327244496e-05, | |
| "loss": 1.2494, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 6.055727554179566, | |
| "grad_norm": 7.602885755223493, | |
| "learning_rate": 5.1257175124349464e-05, | |
| "loss": 1.2171, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 6.061919504643963, | |
| "grad_norm": 7.073271538574044, | |
| "learning_rate": 5.117337938835186e-05, | |
| "loss": 1.1765, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 6.068111455108359, | |
| "grad_norm": 7.315973298938372, | |
| "learning_rate": 5.1089580354747026e-05, | |
| "loss": 1.118, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 6.074303405572755, | |
| "grad_norm": 6.601903535655793, | |
| "learning_rate": 5.100577825903958e-05, | |
| "loss": 1.2171, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 6.080495356037152, | |
| "grad_norm": 6.757876749392141, | |
| "learning_rate": 5.092197333674286e-05, | |
| "loss": 1.2503, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 6.086687306501548, | |
| "grad_norm": 6.9466240275860835, | |
| "learning_rate": 5.0838165823377995e-05, | |
| "loss": 1.2236, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 6.0928792569659445, | |
| "grad_norm": 7.381969086666438, | |
| "learning_rate": 5.0754355954473466e-05, | |
| "loss": 1.2083, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 6.099071207430341, | |
| "grad_norm": 7.145098862853887, | |
| "learning_rate": 5.0670543965564386e-05, | |
| "loss": 1.3174, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 6.105263157894737, | |
| "grad_norm": 7.137360767402197, | |
| "learning_rate": 5.0586730092191835e-05, | |
| "loss": 1.1661, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 6.1114551083591335, | |
| "grad_norm": 7.417766045611863, | |
| "learning_rate": 5.0502914569902116e-05, | |
| "loss": 1.1905, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 6.117647058823529, | |
| "grad_norm": 7.26464568460221, | |
| "learning_rate": 5.041909763424625e-05, | |
| "loss": 1.2316, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 6.123839009287925, | |
| "grad_norm": 7.5052794586230345, | |
| "learning_rate": 5.033527952077917e-05, | |
| "loss": 1.1215, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 6.130030959752322, | |
| "grad_norm": 7.1970182467527355, | |
| "learning_rate": 5.025146046505917e-05, | |
| "loss": 1.2079, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 6.136222910216718, | |
| "grad_norm": 7.749893049143311, | |
| "learning_rate": 5.0167640702647155e-05, | |
| "loss": 1.3064, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 6.142414860681114, | |
| "grad_norm": 7.551889488671015, | |
| "learning_rate": 5.0083820469106014e-05, | |
| "loss": 1.2398, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 6.148606811145511, | |
| "grad_norm": 7.588653228547087, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2056, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 6.154798761609907, | |
| "grad_norm": 7.447548089817992, | |
| "learning_rate": 4.991617953089399e-05, | |
| "loss": 1.2482, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 6.1609907120743035, | |
| "grad_norm": 7.159080033768566, | |
| "learning_rate": 4.9832359297352856e-05, | |
| "loss": 1.2093, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 6.1671826625387, | |
| "grad_norm": 7.456941150360937, | |
| "learning_rate": 4.9748539534940825e-05, | |
| "loss": 1.2034, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 6.173374613003096, | |
| "grad_norm": 8.179625978947795, | |
| "learning_rate": 4.966472047922083e-05, | |
| "loss": 1.2213, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.179566563467493, | |
| "grad_norm": 7.691969033733335, | |
| "learning_rate": 4.958090236575377e-05, | |
| "loss": 1.2253, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 6.185758513931889, | |
| "grad_norm": 7.2575621885339645, | |
| "learning_rate": 4.9497085430097896e-05, | |
| "loss": 1.3645, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 6.191950464396285, | |
| "grad_norm": 6.507233877034855, | |
| "learning_rate": 4.941326990780819e-05, | |
| "loss": 1.1571, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 6.198142414860681, | |
| "grad_norm": 6.651433405699857, | |
| "learning_rate": 4.932945603443563e-05, | |
| "loss": 1.1652, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 6.204334365325077, | |
| "grad_norm": 6.650448093340969, | |
| "learning_rate": 4.9245644045526546e-05, | |
| "loss": 1.2783, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 6.2105263157894735, | |
| "grad_norm": 7.781500509708706, | |
| "learning_rate": 4.916183417662202e-05, | |
| "loss": 1.3173, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 6.21671826625387, | |
| "grad_norm": 7.517957799121537, | |
| "learning_rate": 4.907802666325716e-05, | |
| "loss": 1.2165, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 6.222910216718266, | |
| "grad_norm": 7.157971116077995, | |
| "learning_rate": 4.8994221740960424e-05, | |
| "loss": 1.2808, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 6.2291021671826625, | |
| "grad_norm": 6.816498614027582, | |
| "learning_rate": 4.8910419645253e-05, | |
| "loss": 1.2127, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 6.235294117647059, | |
| "grad_norm": 6.787835631470309, | |
| "learning_rate": 4.882662061164814e-05, | |
| "loss": 1.2102, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 6.241486068111455, | |
| "grad_norm": 7.371585623447389, | |
| "learning_rate": 4.874282487565053e-05, | |
| "loss": 1.1692, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 6.247678018575852, | |
| "grad_norm": 7.24692426387527, | |
| "learning_rate": 4.8659032672755516e-05, | |
| "loss": 1.1388, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 6.253869969040248, | |
| "grad_norm": 7.6875630481514206, | |
| "learning_rate": 4.8575244238448546e-05, | |
| "loss": 1.2984, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 6.260061919504644, | |
| "grad_norm": 8.185112676453237, | |
| "learning_rate": 4.8491459808204506e-05, | |
| "loss": 1.1884, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 6.266253869969041, | |
| "grad_norm": 7.504289154797226, | |
| "learning_rate": 4.8407679617486974e-05, | |
| "loss": 1.1316, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 6.272445820433436, | |
| "grad_norm": 7.442593673019155, | |
| "learning_rate": 4.832390390174763e-05, | |
| "loss": 1.2232, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 6.2786377708978325, | |
| "grad_norm": 7.614862457360967, | |
| "learning_rate": 4.824013289642563e-05, | |
| "loss": 1.31, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 6.284829721362229, | |
| "grad_norm": 7.024726600452601, | |
| "learning_rate": 4.815636683694683e-05, | |
| "loss": 1.2616, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 6.291021671826625, | |
| "grad_norm": 7.377696892059301, | |
| "learning_rate": 4.807260595872322e-05, | |
| "loss": 1.24, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 6.2972136222910216, | |
| "grad_norm": 8.404120290677987, | |
| "learning_rate": 4.79888504971522e-05, | |
| "loss": 1.2933, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 6.303405572755418, | |
| "grad_norm": 6.962131331816545, | |
| "learning_rate": 4.7905100687615956e-05, | |
| "loss": 1.2178, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 6.309597523219814, | |
| "grad_norm": 7.4231879389884, | |
| "learning_rate": 4.7821356765480834e-05, | |
| "loss": 1.365, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 6.315789473684211, | |
| "grad_norm": 7.668830962212028, | |
| "learning_rate": 4.773761896609658e-05, | |
| "loss": 1.1617, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 6.321981424148607, | |
| "grad_norm": 8.124924759999253, | |
| "learning_rate": 4.7653887524795735e-05, | |
| "loss": 1.288, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 6.328173374613003, | |
| "grad_norm": 6.8212071234113365, | |
| "learning_rate": 4.7570162676893014e-05, | |
| "loss": 1.2787, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 6.3343653250774, | |
| "grad_norm": 7.628456619758793, | |
| "learning_rate": 4.748644465768457e-05, | |
| "loss": 1.2784, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 6.340557275541796, | |
| "grad_norm": 6.808373085825395, | |
| "learning_rate": 4.740273370244734e-05, | |
| "loss": 1.165, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 6.346749226006192, | |
| "grad_norm": 7.305123080991359, | |
| "learning_rate": 4.7319030046438474e-05, | |
| "loss": 1.3038, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 6.352941176470588, | |
| "grad_norm": 7.031017640077646, | |
| "learning_rate": 4.723533392489457e-05, | |
| "loss": 1.2891, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 6.359133126934984, | |
| "grad_norm": 7.946312792286679, | |
| "learning_rate": 4.7151645573031064e-05, | |
| "loss": 1.2275, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 6.365325077399381, | |
| "grad_norm": 7.232948196291571, | |
| "learning_rate": 4.706796522604152e-05, | |
| "loss": 1.2756, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 6.371517027863777, | |
| "grad_norm": 8.169258262407036, | |
| "learning_rate": 4.698429311909705e-05, | |
| "loss": 1.2046, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 6.377708978328173, | |
| "grad_norm": 7.139112933535375, | |
| "learning_rate": 4.690062948734558e-05, | |
| "loss": 1.2697, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 6.38390092879257, | |
| "grad_norm": 7.031067573669555, | |
| "learning_rate": 4.681697456591126e-05, | |
| "loss": 1.3274, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 6.390092879256966, | |
| "grad_norm": 7.388094833181507, | |
| "learning_rate": 4.673332858989371e-05, | |
| "loss": 1.3151, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 6.396284829721362, | |
| "grad_norm": 7.525891020121259, | |
| "learning_rate": 4.664969179436744e-05, | |
| "loss": 1.3229, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 6.402476780185759, | |
| "grad_norm": 7.036939378953139, | |
| "learning_rate": 4.656606441438113e-05, | |
| "loss": 1.3305, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 6.408668730650155, | |
| "grad_norm": 6.516201091133355, | |
| "learning_rate": 4.648244668495704e-05, | |
| "loss": 1.2263, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 6.414860681114551, | |
| "grad_norm": 7.819583293034257, | |
| "learning_rate": 4.6398838841090284e-05, | |
| "loss": 1.3778, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 6.421052631578947, | |
| "grad_norm": 7.405554618748319, | |
| "learning_rate": 4.631524111774822e-05, | |
| "loss": 1.2554, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 6.427244582043343, | |
| "grad_norm": 7.485395073347659, | |
| "learning_rate": 4.623165374986971e-05, | |
| "loss": 1.3306, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 6.43343653250774, | |
| "grad_norm": 7.3281945090055824, | |
| "learning_rate": 4.6148076972364544e-05, | |
| "loss": 1.2903, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 6.439628482972136, | |
| "grad_norm": 6.89408951561583, | |
| "learning_rate": 4.606451102011278e-05, | |
| "loss": 1.218, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 6.445820433436532, | |
| "grad_norm": 7.208808297092486, | |
| "learning_rate": 4.598095612796398e-05, | |
| "loss": 1.2513, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 6.452012383900929, | |
| "grad_norm": 8.362888875092093, | |
| "learning_rate": 4.5897412530736735e-05, | |
| "loss": 1.28, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 6.458204334365325, | |
| "grad_norm": 8.221896268659757, | |
| "learning_rate": 4.5813880463217766e-05, | |
| "loss": 1.3607, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 6.464396284829721, | |
| "grad_norm": 6.598237194007563, | |
| "learning_rate": 4.573036016016149e-05, | |
| "loss": 1.2459, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 6.470588235294118, | |
| "grad_norm": 7.17506795330074, | |
| "learning_rate": 4.564685185628919e-05, | |
| "loss": 1.2524, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 6.476780185758514, | |
| "grad_norm": 7.37054652654002, | |
| "learning_rate": 4.556335578628849e-05, | |
| "loss": 1.2609, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 6.4829721362229105, | |
| "grad_norm": 7.444318533212484, | |
| "learning_rate": 4.5479872184812575e-05, | |
| "loss": 1.1776, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 6.489164086687307, | |
| "grad_norm": 6.7548786489662, | |
| "learning_rate": 4.539640128647965e-05, | |
| "loss": 1.1816, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 6.495356037151703, | |
| "grad_norm": 7.502330023138323, | |
| "learning_rate": 4.531294332587216e-05, | |
| "loss": 1.2786, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 6.5015479876160995, | |
| "grad_norm": 7.6873595870822475, | |
| "learning_rate": 4.5229498537536235e-05, | |
| "loss": 1.1847, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 6.507739938080495, | |
| "grad_norm": 7.08085653318228, | |
| "learning_rate": 4.514606715598096e-05, | |
| "loss": 1.2494, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 6.513931888544891, | |
| "grad_norm": 7.774057962263595, | |
| "learning_rate": 4.506264941567774e-05, | |
| "loss": 1.1855, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 6.520123839009288, | |
| "grad_norm": 7.957261147038559, | |
| "learning_rate": 4.497924555105966e-05, | |
| "loss": 1.2751, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 6.526315789473684, | |
| "grad_norm": 7.307938939131949, | |
| "learning_rate": 4.489585579652083e-05, | |
| "loss": 1.3571, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 6.53250773993808, | |
| "grad_norm": 7.9128153067396605, | |
| "learning_rate": 4.4812480386415655e-05, | |
| "loss": 1.3486, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 6.538699690402477, | |
| "grad_norm": 7.585860667174547, | |
| "learning_rate": 4.472911955505824e-05, | |
| "loss": 1.3405, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 6.544891640866873, | |
| "grad_norm": 7.053910641821553, | |
| "learning_rate": 4.464577353672175e-05, | |
| "loss": 1.2813, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 6.5510835913312695, | |
| "grad_norm": 7.0175847912120535, | |
| "learning_rate": 4.456244256563769e-05, | |
| "loss": 1.2993, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 6.557275541795666, | |
| "grad_norm": 8.261893892570697, | |
| "learning_rate": 4.4479126875995304e-05, | |
| "loss": 1.3186, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 6.563467492260062, | |
| "grad_norm": 7.377863177529412, | |
| "learning_rate": 4.439582670194085e-05, | |
| "loss": 1.2835, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 6.569659442724459, | |
| "grad_norm": 7.988132361082391, | |
| "learning_rate": 4.431254227757703e-05, | |
| "loss": 1.2427, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 6.575851393188854, | |
| "grad_norm": 7.294581625440724, | |
| "learning_rate": 4.422927383696224e-05, | |
| "loss": 1.4212, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 6.58204334365325, | |
| "grad_norm": 7.293717549302451, | |
| "learning_rate": 4.414602161410996e-05, | |
| "loss": 1.3549, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 6.588235294117647, | |
| "grad_norm": 7.058630306102499, | |
| "learning_rate": 4.406278584298813e-05, | |
| "loss": 1.2859, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 6.594427244582043, | |
| "grad_norm": 6.921408254017408, | |
| "learning_rate": 4.397956675751844e-05, | |
| "loss": 1.2809, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 6.6006191950464395, | |
| "grad_norm": 7.558955213024363, | |
| "learning_rate": 4.389636459157567e-05, | |
| "loss": 1.3715, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 6.606811145510836, | |
| "grad_norm": 6.88911221915387, | |
| "learning_rate": 4.381317957898704e-05, | |
| "loss": 1.3168, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 6.613003095975232, | |
| "grad_norm": 7.249344304210554, | |
| "learning_rate": 4.373001195353159e-05, | |
| "loss": 1.3486, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 6.6191950464396285, | |
| "grad_norm": 7.779301524807008, | |
| "learning_rate": 4.364686194893952e-05, | |
| "loss": 1.1833, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 6.625386996904025, | |
| "grad_norm": 6.863098452906575, | |
| "learning_rate": 4.356372979889146e-05, | |
| "loss": 1.3434, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 6.631578947368421, | |
| "grad_norm": 7.629387293816119, | |
| "learning_rate": 4.348061573701786e-05, | |
| "loss": 1.3763, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 6.637770897832818, | |
| "grad_norm": 6.759090261353713, | |
| "learning_rate": 4.339751999689839e-05, | |
| "loss": 1.2835, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 6.643962848297214, | |
| "grad_norm": 6.950291332023515, | |
| "learning_rate": 4.3314442812061174e-05, | |
| "loss": 1.3158, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 6.65015479876161, | |
| "grad_norm": 7.394433820704982, | |
| "learning_rate": 4.323138441598219e-05, | |
| "loss": 1.3166, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 6.656346749226007, | |
| "grad_norm": 7.520288383722824, | |
| "learning_rate": 4.3148345042084674e-05, | |
| "loss": 1.3971, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 6.662538699690402, | |
| "grad_norm": 7.226902674751723, | |
| "learning_rate": 4.306532492373836e-05, | |
| "loss": 1.3676, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 6.6687306501547985, | |
| "grad_norm": 7.914934791249491, | |
| "learning_rate": 4.2982324294258855e-05, | |
| "loss": 1.2324, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 6.674922600619195, | |
| "grad_norm": 7.132623440013545, | |
| "learning_rate": 4.289934338690701e-05, | |
| "loss": 1.3126, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 6.681114551083591, | |
| "grad_norm": 6.94850316262997, | |
| "learning_rate": 4.281638243488823e-05, | |
| "loss": 1.3197, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 6.687306501547988, | |
| "grad_norm": 7.615556368279129, | |
| "learning_rate": 4.273344167135191e-05, | |
| "loss": 1.2855, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 6.693498452012384, | |
| "grad_norm": 8.482830314291196, | |
| "learning_rate": 4.265052132939063e-05, | |
| "loss": 1.2906, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 6.69969040247678, | |
| "grad_norm": 6.951661079605685, | |
| "learning_rate": 4.2567621642039596e-05, | |
| "loss": 1.3183, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 6.705882352941177, | |
| "grad_norm": 7.328744620320157, | |
| "learning_rate": 4.2484742842276e-05, | |
| "loss": 1.2574, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 6.712074303405573, | |
| "grad_norm": 7.433086061619386, | |
| "learning_rate": 4.240188516301829e-05, | |
| "loss": 1.2361, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 6.718266253869969, | |
| "grad_norm": 7.451348957570564, | |
| "learning_rate": 4.2319048837125566e-05, | |
| "loss": 1.4189, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 6.724458204334366, | |
| "grad_norm": 7.36655253165013, | |
| "learning_rate": 4.223623409739695e-05, | |
| "loss": 1.2414, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 6.730650154798761, | |
| "grad_norm": 7.50762675670494, | |
| "learning_rate": 4.215344117657088e-05, | |
| "loss": 1.2911, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 6.7368421052631575, | |
| "grad_norm": 7.2198068102674515, | |
| "learning_rate": 4.207067030732449e-05, | |
| "loss": 1.2591, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 6.743034055727554, | |
| "grad_norm": 7.423469615444281, | |
| "learning_rate": 4.198792172227287e-05, | |
| "loss": 1.2815, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 6.74922600619195, | |
| "grad_norm": 7.8244524756087905, | |
| "learning_rate": 4.1905195653968585e-05, | |
| "loss": 1.2598, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 6.755417956656347, | |
| "grad_norm": 7.455185432784073, | |
| "learning_rate": 4.182249233490084e-05, | |
| "loss": 1.3286, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 6.761609907120743, | |
| "grad_norm": 7.35007999134081, | |
| "learning_rate": 4.173981199749498e-05, | |
| "loss": 1.3021, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 6.767801857585139, | |
| "grad_norm": 6.911478606382307, | |
| "learning_rate": 4.1657154874111695e-05, | |
| "loss": 1.3063, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 6.773993808049536, | |
| "grad_norm": 7.641719895442311, | |
| "learning_rate": 4.157452119704648e-05, | |
| "loss": 1.2602, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 6.780185758513932, | |
| "grad_norm": 7.401036899860657, | |
| "learning_rate": 4.149191119852891e-05, | |
| "loss": 1.329, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 6.786377708978328, | |
| "grad_norm": 7.09677983300196, | |
| "learning_rate": 4.140932511072201e-05, | |
| "loss": 1.321, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 6.792569659442725, | |
| "grad_norm": 7.277552375894589, | |
| "learning_rate": 4.1326763165721655e-05, | |
| "loss": 1.1842, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 6.798761609907121, | |
| "grad_norm": 8.032866620643269, | |
| "learning_rate": 4.124422559555584e-05, | |
| "loss": 1.383, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 6.804953560371517, | |
| "grad_norm": 6.913712713930272, | |
| "learning_rate": 4.1161712632184045e-05, | |
| "loss": 1.2827, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 6.811145510835914, | |
| "grad_norm": 6.835509230572801, | |
| "learning_rate": 4.10792245074966e-05, | |
| "loss": 1.3363, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 6.817337461300309, | |
| "grad_norm": 7.879506828917882, | |
| "learning_rate": 4.0996761453314056e-05, | |
| "loss": 1.2357, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 6.823529411764706, | |
| "grad_norm": 7.850103476202386, | |
| "learning_rate": 4.0914323701386464e-05, | |
| "loss": 1.2596, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 6.829721362229102, | |
| "grad_norm": 7.300238242699234, | |
| "learning_rate": 4.083191148339283e-05, | |
| "loss": 1.3611, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 6.835913312693498, | |
| "grad_norm": 7.2927400255517165, | |
| "learning_rate": 4.074952503094033e-05, | |
| "loss": 1.2284, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 6.842105263157895, | |
| "grad_norm": 6.723280082966505, | |
| "learning_rate": 4.0667164575563784e-05, | |
| "loss": 1.1778, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 6.848297213622291, | |
| "grad_norm": 6.577268826811694, | |
| "learning_rate": 4.0584830348724935e-05, | |
| "loss": 1.318, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 6.854489164086687, | |
| "grad_norm": 7.463974601182562, | |
| "learning_rate": 4.050252258181177e-05, | |
| "loss": 1.3231, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 6.860681114551084, | |
| "grad_norm": 7.4880776364105275, | |
| "learning_rate": 4.042024150613799e-05, | |
| "loss": 1.3418, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 6.86687306501548, | |
| "grad_norm": 8.416888779323035, | |
| "learning_rate": 4.033798735294224e-05, | |
| "loss": 1.366, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 6.8730650154798765, | |
| "grad_norm": 7.515989995582284, | |
| "learning_rate": 4.025576035338752e-05, | |
| "loss": 1.3318, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 6.879256965944273, | |
| "grad_norm": 6.861478583396406, | |
| "learning_rate": 4.017356073856049e-05, | |
| "loss": 1.2534, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 6.885448916408668, | |
| "grad_norm": 6.68693509128183, | |
| "learning_rate": 4.0091388739470884e-05, | |
| "loss": 1.2903, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 6.891640866873065, | |
| "grad_norm": 8.488756826558117, | |
| "learning_rate": 4.000924458705079e-05, | |
| "loss": 1.4168, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 6.897832817337461, | |
| "grad_norm": 7.461099214702575, | |
| "learning_rate": 3.9927128512154103e-05, | |
| "loss": 1.3652, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 6.904024767801857, | |
| "grad_norm": 7.025634053844339, | |
| "learning_rate": 3.984504074555573e-05, | |
| "loss": 1.2759, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 6.910216718266254, | |
| "grad_norm": 7.076864815181845, | |
| "learning_rate": 3.976298151795107e-05, | |
| "loss": 1.2637, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 6.91640866873065, | |
| "grad_norm": 7.262703330555911, | |
| "learning_rate": 3.968095105995531e-05, | |
| "loss": 1.1953, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 6.922600619195046, | |
| "grad_norm": 7.38167980935885, | |
| "learning_rate": 3.959894960210275e-05, | |
| "loss": 1.3502, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 6.928792569659443, | |
| "grad_norm": 6.487417226403372, | |
| "learning_rate": 3.951697737484625e-05, | |
| "loss": 1.2807, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 6.934984520123839, | |
| "grad_norm": 6.621086820516981, | |
| "learning_rate": 3.9435034608556504e-05, | |
| "loss": 1.1824, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 6.9411764705882355, | |
| "grad_norm": 6.198511111175569, | |
| "learning_rate": 3.935312153352137e-05, | |
| "loss": 1.2067, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 6.947368421052632, | |
| "grad_norm": 7.52708660819256, | |
| "learning_rate": 3.9271238379945283e-05, | |
| "loss": 1.3398, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 6.953560371517028, | |
| "grad_norm": 7.372110340772461, | |
| "learning_rate": 3.918938537794862e-05, | |
| "loss": 1.1414, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 6.959752321981425, | |
| "grad_norm": 7.780518558451204, | |
| "learning_rate": 3.910756275756697e-05, | |
| "loss": 1.2668, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 6.965944272445821, | |
| "grad_norm": 7.463867146831107, | |
| "learning_rate": 3.902577074875061e-05, | |
| "loss": 1.2988, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 6.972136222910216, | |
| "grad_norm": 7.644638123297254, | |
| "learning_rate": 3.8944009581363696e-05, | |
| "loss": 1.3475, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 6.978328173374613, | |
| "grad_norm": 7.504426633437561, | |
| "learning_rate": 3.88622794851838e-05, | |
| "loss": 1.2971, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 6.984520123839009, | |
| "grad_norm": 7.362734099810955, | |
| "learning_rate": 3.878058068990109e-05, | |
| "loss": 1.2548, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 6.9907120743034055, | |
| "grad_norm": 6.620299195182782, | |
| "learning_rate": 3.869891342511782e-05, | |
| "loss": 1.2814, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 6.996904024767802, | |
| "grad_norm": 6.829774476356422, | |
| "learning_rate": 3.8617277920347624e-05, | |
| "loss": 1.3188, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 6.829774476356422, | |
| "learning_rate": 3.853567440501489e-05, | |
| "loss": 0.5846, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 7.006191950464396, | |
| "grad_norm": 5.757452200584078, | |
| "learning_rate": 3.845410310845407e-05, | |
| "loss": 0.7691, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 7.012383900928793, | |
| "grad_norm": 5.3916472112707075, | |
| "learning_rate": 3.8372564259909086e-05, | |
| "loss": 0.7703, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 7.018575851393189, | |
| "grad_norm": 5.659933567827994, | |
| "learning_rate": 3.829105808853269e-05, | |
| "loss": 0.7488, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 7.024767801857585, | |
| "grad_norm": 6.129208627573635, | |
| "learning_rate": 3.820958482338575e-05, | |
| "loss": 0.7068, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 7.030959752321982, | |
| "grad_norm": 6.4799801807011335, | |
| "learning_rate": 3.812814469343674e-05, | |
| "loss": 0.6127, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 7.037151702786378, | |
| "grad_norm": 7.22503344073261, | |
| "learning_rate": 3.8046737927560916e-05, | |
| "loss": 0.7233, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 7.043343653250774, | |
| "grad_norm": 8.578266259964357, | |
| "learning_rate": 3.7965364754539845e-05, | |
| "loss": 0.7226, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 7.04953560371517, | |
| "grad_norm": 8.853781988845313, | |
| "learning_rate": 3.7884025403060635e-05, | |
| "loss": 0.5882, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 7.055727554179566, | |
| "grad_norm": 7.8476497582946365, | |
| "learning_rate": 3.780272010171535e-05, | |
| "loss": 0.6746, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 7.061919504643963, | |
| "grad_norm": 8.091888749091458, | |
| "learning_rate": 3.7721449079000413e-05, | |
| "loss": 0.628, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 7.068111455108359, | |
| "grad_norm": 8.513359984966414, | |
| "learning_rate": 3.7640212563315865e-05, | |
| "loss": 0.6322, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 7.074303405572755, | |
| "grad_norm": 7.089706535286417, | |
| "learning_rate": 3.7559010782964776e-05, | |
| "loss": 0.584, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 7.080495356037152, | |
| "grad_norm": 6.652458662446482, | |
| "learning_rate": 3.747784396615258e-05, | |
| "loss": 0.6509, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 7.086687306501548, | |
| "grad_norm": 7.757592156583572, | |
| "learning_rate": 3.73967123409865e-05, | |
| "loss": 0.6271, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 7.0928792569659445, | |
| "grad_norm": 8.0459839029957, | |
| "learning_rate": 3.7315616135474805e-05, | |
| "loss": 0.6932, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 7.099071207430341, | |
| "grad_norm": 7.747894884862985, | |
| "learning_rate": 3.723455557752628e-05, | |
| "loss": 0.6297, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 7.105263157894737, | |
| "grad_norm": 8.0768419517607, | |
| "learning_rate": 3.715353089494947e-05, | |
| "loss": 0.695, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 7.1114551083591335, | |
| "grad_norm": 8.347655110729336, | |
| "learning_rate": 3.707254231545214e-05, | |
| "loss": 0.7334, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 7.117647058823529, | |
| "grad_norm": 8.610878254087831, | |
| "learning_rate": 3.699159006664056e-05, | |
| "loss": 0.6304, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 7.123839009287925, | |
| "grad_norm": 8.017941711819208, | |
| "learning_rate": 3.691067437601893e-05, | |
| "loss": 0.6681, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 7.130030959752322, | |
| "grad_norm": 7.751879604109213, | |
| "learning_rate": 3.682979547098867e-05, | |
| "loss": 0.592, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 7.136222910216718, | |
| "grad_norm": 7.0682048074196, | |
| "learning_rate": 3.674895357884787e-05, | |
| "loss": 0.6628, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 7.142414860681114, | |
| "grad_norm": 7.524978104619749, | |
| "learning_rate": 3.666814892679056e-05, | |
| "loss": 0.6057, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 7.148606811145511, | |
| "grad_norm": 7.645817083711223, | |
| "learning_rate": 3.6587381741906126e-05, | |
| "loss": 0.6442, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 7.154798761609907, | |
| "grad_norm": 7.607342674234635, | |
| "learning_rate": 3.6506652251178665e-05, | |
| "loss": 0.6556, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 7.1609907120743035, | |
| "grad_norm": 6.670881036041286, | |
| "learning_rate": 3.6425960681486304e-05, | |
| "loss": 0.6234, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 7.1671826625387, | |
| "grad_norm": 7.376001317028536, | |
| "learning_rate": 3.6345307259600655e-05, | |
| "loss": 0.6421, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 7.173374613003096, | |
| "grad_norm": 7.991999140107628, | |
| "learning_rate": 3.62646922121861e-05, | |
| "loss": 0.6461, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 7.179566563467493, | |
| "grad_norm": 7.122464598105445, | |
| "learning_rate": 3.618411576579916e-05, | |
| "loss": 0.5966, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 7.185758513931889, | |
| "grad_norm": 7.935536657371515, | |
| "learning_rate": 3.6103578146887864e-05, | |
| "loss": 0.5976, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 7.191950464396285, | |
| "grad_norm": 8.906340192711372, | |
| "learning_rate": 3.6023079581791166e-05, | |
| "loss": 0.6481, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 7.198142414860681, | |
| "grad_norm": 8.091902630476214, | |
| "learning_rate": 3.594262029673822e-05, | |
| "loss": 0.6261, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 7.204334365325077, | |
| "grad_norm": 8.613601969483307, | |
| "learning_rate": 3.5862200517847826e-05, | |
| "loss": 0.7235, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 7.2105263157894735, | |
| "grad_norm": 8.311992100816648, | |
| "learning_rate": 3.578182047112773e-05, | |
| "loss": 0.6724, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 7.21671826625387, | |
| "grad_norm": 8.307931162702967, | |
| "learning_rate": 3.570148038247404e-05, | |
| "loss": 0.5556, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 7.222910216718266, | |
| "grad_norm": 6.854708876762124, | |
| "learning_rate": 3.562118047767056e-05, | |
| "loss": 0.5809, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 7.2291021671826625, | |
| "grad_norm": 7.807928365862507, | |
| "learning_rate": 3.554092098238811e-05, | |
| "loss": 0.6964, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 7.235294117647059, | |
| "grad_norm": 8.01432971517405, | |
| "learning_rate": 3.5460702122184045e-05, | |
| "loss": 0.6613, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 7.241486068111455, | |
| "grad_norm": 8.324636492816218, | |
| "learning_rate": 3.5380524122501466e-05, | |
| "loss": 0.7325, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 7.247678018575852, | |
| "grad_norm": 8.173897222971299, | |
| "learning_rate": 3.5300387208668636e-05, | |
| "loss": 0.6754, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 7.253869969040248, | |
| "grad_norm": 8.50762012110652, | |
| "learning_rate": 3.5220291605898355e-05, | |
| "loss": 0.662, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 7.260061919504644, | |
| "grad_norm": 8.12257677652454, | |
| "learning_rate": 3.514023753928734e-05, | |
| "loss": 0.654, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 7.266253869969041, | |
| "grad_norm": 8.54175725853606, | |
| "learning_rate": 3.506022523381555e-05, | |
| "loss": 0.5887, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 7.272445820433436, | |
| "grad_norm": 7.299153779760447, | |
| "learning_rate": 3.4980254914345634e-05, | |
| "loss": 0.6315, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 7.2786377708978325, | |
| "grad_norm": 8.270759623026917, | |
| "learning_rate": 3.4900326805622184e-05, | |
| "loss": 0.668, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 7.284829721362229, | |
| "grad_norm": 8.817697433292942, | |
| "learning_rate": 3.482044113227121e-05, | |
| "loss": 0.7288, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 7.291021671826625, | |
| "grad_norm": 7.929767327385549, | |
| "learning_rate": 3.474059811879944e-05, | |
| "loss": 0.7281, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 7.2972136222910216, | |
| "grad_norm": 8.535652364006877, | |
| "learning_rate": 3.4660797989593685e-05, | |
| "loss": 0.7105, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 7.303405572755418, | |
| "grad_norm": 8.356481763272463, | |
| "learning_rate": 3.4581040968920307e-05, | |
| "loss": 0.636, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 7.309597523219814, | |
| "grad_norm": 8.239685126481039, | |
| "learning_rate": 3.450132728092448e-05, | |
| "loss": 0.5836, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 7.315789473684211, | |
| "grad_norm": 8.160510633814823, | |
| "learning_rate": 3.4421657149629595e-05, | |
| "loss": 0.7017, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 7.321981424148607, | |
| "grad_norm": 7.9776807141446975, | |
| "learning_rate": 3.434203079893662e-05, | |
| "loss": 0.6823, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 7.328173374613003, | |
| "grad_norm": 8.82859100339137, | |
| "learning_rate": 3.426244845262351e-05, | |
| "loss": 0.6992, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 7.3343653250774, | |
| "grad_norm": 7.9540922370357965, | |
| "learning_rate": 3.418291033434454e-05, | |
| "loss": 0.6119, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 7.340557275541796, | |
| "grad_norm": 8.399930431271818, | |
| "learning_rate": 3.410341666762971e-05, | |
| "loss": 0.6753, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 7.346749226006192, | |
| "grad_norm": 8.093683857281743, | |
| "learning_rate": 3.4023967675884046e-05, | |
| "loss": 0.6398, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 7.352941176470588, | |
| "grad_norm": 7.701390167163244, | |
| "learning_rate": 3.3944563582387084e-05, | |
| "loss": 0.6885, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 7.359133126934984, | |
| "grad_norm": 7.362680274229581, | |
| "learning_rate": 3.386520461029214e-05, | |
| "loss": 0.6913, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 7.365325077399381, | |
| "grad_norm": 8.383654254826384, | |
| "learning_rate": 3.37858909826257e-05, | |
| "loss": 0.6565, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 7.371517027863777, | |
| "grad_norm": 8.096659536515782, | |
| "learning_rate": 3.37066229222869e-05, | |
| "loss": 0.6613, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 7.377708978328173, | |
| "grad_norm": 8.969915988273861, | |
| "learning_rate": 3.362740065204674e-05, | |
| "loss": 0.6977, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 7.38390092879257, | |
| "grad_norm": 8.00810413870692, | |
| "learning_rate": 3.354822439454756e-05, | |
| "loss": 0.6401, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 7.390092879256966, | |
| "grad_norm": 7.801977933581489, | |
| "learning_rate": 3.346909437230237e-05, | |
| "loss": 0.624, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 7.396284829721362, | |
| "grad_norm": 7.876403238913395, | |
| "learning_rate": 3.3390010807694296e-05, | |
| "loss": 0.6615, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 7.402476780185759, | |
| "grad_norm": 8.311377001905285, | |
| "learning_rate": 3.331097392297582e-05, | |
| "loss": 0.6783, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 7.408668730650155, | |
| "grad_norm": 8.23551557778447, | |
| "learning_rate": 3.323198394026832e-05, | |
| "loss": 0.6823, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 7.414860681114551, | |
| "grad_norm": 7.837471162046131, | |
| "learning_rate": 3.3153041081561295e-05, | |
| "loss": 0.7097, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 7.421052631578947, | |
| "grad_norm": 8.598917363563292, | |
| "learning_rate": 3.307414556871187e-05, | |
| "loss": 0.6873, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 7.427244582043343, | |
| "grad_norm": 8.158249463290781, | |
| "learning_rate": 3.299529762344406e-05, | |
| "loss": 0.5893, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 7.43343653250774, | |
| "grad_norm": 8.313299477526693, | |
| "learning_rate": 3.291649746734821e-05, | |
| "loss": 0.7041, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 7.439628482972136, | |
| "grad_norm": 7.998392093179114, | |
| "learning_rate": 3.283774532188039e-05, | |
| "loss": 0.7022, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 7.445820433436532, | |
| "grad_norm": 7.083629588891991, | |
| "learning_rate": 3.275904140836172e-05, | |
| "loss": 0.6606, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 7.452012383900929, | |
| "grad_norm": 8.31730904563776, | |
| "learning_rate": 3.268038594797777e-05, | |
| "loss": 0.6762, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 7.458204334365325, | |
| "grad_norm": 8.131121047191598, | |
| "learning_rate": 3.260177916177793e-05, | |
| "loss": 0.6383, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 7.464396284829721, | |
| "grad_norm": 7.963816667204574, | |
| "learning_rate": 3.2523221270674845e-05, | |
| "loss": 0.689, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 7.470588235294118, | |
| "grad_norm": 7.721059679347194, | |
| "learning_rate": 3.244471249544366e-05, | |
| "loss": 0.6835, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 7.476780185758514, | |
| "grad_norm": 7.920354396920408, | |
| "learning_rate": 3.236625305672161e-05, | |
| "loss": 0.6929, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 7.4829721362229105, | |
| "grad_norm": 7.674607331785555, | |
| "learning_rate": 3.228784317500716e-05, | |
| "loss": 0.671, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 7.489164086687307, | |
| "grad_norm": 8.495915761327536, | |
| "learning_rate": 3.220948307065959e-05, | |
| "loss": 0.7254, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 7.495356037151703, | |
| "grad_norm": 8.071322310043705, | |
| "learning_rate": 3.213117296389822e-05, | |
| "loss": 0.7481, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 7.5015479876160995, | |
| "grad_norm": 8.612027411012539, | |
| "learning_rate": 3.2052913074801874e-05, | |
| "loss": 0.7564, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 7.507739938080495, | |
| "grad_norm": 8.331997297025653, | |
| "learning_rate": 3.197470362330829e-05, | |
| "loss": 0.6429, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 7.513931888544891, | |
| "grad_norm": 8.400156559319822, | |
| "learning_rate": 3.189654482921344e-05, | |
| "loss": 0.5517, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 7.520123839009288, | |
| "grad_norm": 7.088840525035982, | |
| "learning_rate": 3.181843691217091e-05, | |
| "loss": 0.6208, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 7.526315789473684, | |
| "grad_norm": 8.409192028416426, | |
| "learning_rate": 3.17403800916913e-05, | |
| "loss": 0.6873, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 7.53250773993808, | |
| "grad_norm": 7.838297064321753, | |
| "learning_rate": 3.166237458714165e-05, | |
| "loss": 0.7176, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 7.538699690402477, | |
| "grad_norm": 8.046052845799926, | |
| "learning_rate": 3.158442061774474e-05, | |
| "loss": 0.693, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 7.544891640866873, | |
| "grad_norm": 8.79215963366741, | |
| "learning_rate": 3.150651840257855e-05, | |
| "loss": 0.7204, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 7.5510835913312695, | |
| "grad_norm": 8.744244445066755, | |
| "learning_rate": 3.142866816057559e-05, | |
| "loss": 0.7261, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 7.557275541795666, | |
| "grad_norm": 7.9685194893419595, | |
| "learning_rate": 3.1350870110522346e-05, | |
| "loss": 0.6169, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 7.563467492260062, | |
| "grad_norm": 8.619693891486385, | |
| "learning_rate": 3.1273124471058566e-05, | |
| "loss": 0.7385, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 7.569659442724459, | |
| "grad_norm": 8.54050500452178, | |
| "learning_rate": 3.119543146067675e-05, | |
| "loss": 0.6851, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 7.575851393188854, | |
| "grad_norm": 8.140674406800304, | |
| "learning_rate": 3.111779129772147e-05, | |
| "loss": 0.7533, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 7.58204334365325, | |
| "grad_norm": 7.752084913192541, | |
| "learning_rate": 3.104020420038882e-05, | |
| "loss": 0.6617, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 7.588235294117647, | |
| "grad_norm": 8.202639445478995, | |
| "learning_rate": 3.09626703867257e-05, | |
| "loss": 0.7212, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 7.594427244582043, | |
| "grad_norm": 8.188391370662625, | |
| "learning_rate": 3.08851900746293e-05, | |
| "loss": 0.6522, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 7.6006191950464395, | |
| "grad_norm": 7.66888290973458, | |
| "learning_rate": 3.0807763481846454e-05, | |
| "loss": 0.6824, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 7.606811145510836, | |
| "grad_norm": 8.085980770238914, | |
| "learning_rate": 3.073039082597299e-05, | |
| "loss": 0.6681, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 7.613003095975232, | |
| "grad_norm": 7.7566651992345435, | |
| "learning_rate": 3.065307232445322e-05, | |
| "loss": 0.6784, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 7.6191950464396285, | |
| "grad_norm": 7.978641420587795, | |
| "learning_rate": 3.0575808194579204e-05, | |
| "loss": 0.6736, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 7.625386996904025, | |
| "grad_norm": 8.20959902355321, | |
| "learning_rate": 3.049859865349023e-05, | |
| "loss": 0.7497, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 7.631578947368421, | |
| "grad_norm": 8.616008443473586, | |
| "learning_rate": 3.0421443918172155e-05, | |
| "loss": 0.7485, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 7.637770897832818, | |
| "grad_norm": 8.23181397420113, | |
| "learning_rate": 3.0344344205456803e-05, | |
| "loss": 0.6814, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 7.643962848297214, | |
| "grad_norm": 8.270338931472901, | |
| "learning_rate": 3.0267299732021392e-05, | |
| "loss": 0.6712, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 7.65015479876161, | |
| "grad_norm": 8.47844326229016, | |
| "learning_rate": 3.0190310714387915e-05, | |
| "loss": 0.6584, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 7.656346749226007, | |
| "grad_norm": 7.307865533550394, | |
| "learning_rate": 3.011337736892247e-05, | |
| "loss": 0.6361, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 7.662538699690402, | |
| "grad_norm": 7.992336785797606, | |
| "learning_rate": 3.0036499911834698e-05, | |
| "loss": 0.7118, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 7.6687306501547985, | |
| "grad_norm": 8.545711521285286, | |
| "learning_rate": 2.9959678559177217e-05, | |
| "loss": 0.6709, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 7.674922600619195, | |
| "grad_norm": 7.984466724979602, | |
| "learning_rate": 2.988291352684491e-05, | |
| "loss": 0.6251, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 7.681114551083591, | |
| "grad_norm": 8.118020286806026, | |
| "learning_rate": 2.9806205030574457e-05, | |
| "loss": 0.6173, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 7.687306501547988, | |
| "grad_norm": 7.808414608011233, | |
| "learning_rate": 2.9729553285943583e-05, | |
| "loss": 0.6466, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 7.693498452012384, | |
| "grad_norm": 8.06189837506608, | |
| "learning_rate": 2.9652958508370565e-05, | |
| "loss": 0.6322, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 7.69969040247678, | |
| "grad_norm": 7.26649091945809, | |
| "learning_rate": 2.9576420913113567e-05, | |
| "loss": 0.6161, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 7.705882352941177, | |
| "grad_norm": 8.528822640413676, | |
| "learning_rate": 2.9499940715270025e-05, | |
| "loss": 0.6576, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 7.712074303405573, | |
| "grad_norm": 7.519876903624291, | |
| "learning_rate": 2.9423518129776096e-05, | |
| "loss": 0.5952, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 7.718266253869969, | |
| "grad_norm": 7.771182888891596, | |
| "learning_rate": 2.9347153371406055e-05, | |
| "loss": 0.578, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 7.724458204334366, | |
| "grad_norm": 7.921734486415399, | |
| "learning_rate": 2.9270846654771624e-05, | |
| "loss": 0.674, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 7.730650154798761, | |
| "grad_norm": 8.170468598427377, | |
| "learning_rate": 2.9194598194321377e-05, | |
| "loss": 0.6626, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 7.7368421052631575, | |
| "grad_norm": 8.860381430779642, | |
| "learning_rate": 2.911840820434024e-05, | |
| "loss": 0.6652, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 7.743034055727554, | |
| "grad_norm": 7.712114629805272, | |
| "learning_rate": 2.904227689894875e-05, | |
| "loss": 0.6307, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 7.74922600619195, | |
| "grad_norm": 8.303119371595377, | |
| "learning_rate": 2.8966204492102607e-05, | |
| "loss": 0.582, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 7.755417956656347, | |
| "grad_norm": 8.264302476582474, | |
| "learning_rate": 2.889019119759187e-05, | |
| "loss": 0.6494, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 7.761609907120743, | |
| "grad_norm": 9.05326486888945, | |
| "learning_rate": 2.8814237229040552e-05, | |
| "loss": 0.6544, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 7.767801857585139, | |
| "grad_norm": 8.910884920700793, | |
| "learning_rate": 2.873834279990595e-05, | |
| "loss": 0.6165, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 7.773993808049536, | |
| "grad_norm": 8.13373948747055, | |
| "learning_rate": 2.866250812347795e-05, | |
| "loss": 0.6575, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 7.780185758513932, | |
| "grad_norm": 8.601515291139803, | |
| "learning_rate": 2.8586733412878586e-05, | |
| "loss": 0.6822, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 7.786377708978328, | |
| "grad_norm": 8.046468505636659, | |
| "learning_rate": 2.8511018881061345e-05, | |
| "loss": 0.634, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 7.792569659442725, | |
| "grad_norm": 8.035658308300395, | |
| "learning_rate": 2.8435364740810598e-05, | |
| "loss": 0.6531, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 7.798761609907121, | |
| "grad_norm": 8.101062025927892, | |
| "learning_rate": 2.8359771204741e-05, | |
| "loss": 0.66, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 7.804953560371517, | |
| "grad_norm": 8.38621875259201, | |
| "learning_rate": 2.8284238485296838e-05, | |
| "loss": 0.7224, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 7.811145510835914, | |
| "grad_norm": 9.258934210222563, | |
| "learning_rate": 2.8208766794751516e-05, | |
| "loss": 0.6884, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 7.817337461300309, | |
| "grad_norm": 8.119723433617475, | |
| "learning_rate": 2.8133356345206997e-05, | |
| "loss": 0.6528, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 7.823529411764706, | |
| "grad_norm": 7.350294255838845, | |
| "learning_rate": 2.8058007348593e-05, | |
| "loss": 0.598, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 7.829721362229102, | |
| "grad_norm": 7.419408450013087, | |
| "learning_rate": 2.798272001666663e-05, | |
| "loss": 0.7033, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 7.835913312693498, | |
| "grad_norm": 7.6967343676625255, | |
| "learning_rate": 2.7907494561011693e-05, | |
| "loss": 0.6119, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 7.842105263157895, | |
| "grad_norm": 7.437079870880633, | |
| "learning_rate": 2.7832331193038032e-05, | |
| "loss": 0.6005, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 7.848297213622291, | |
| "grad_norm": 7.558840566026589, | |
| "learning_rate": 2.775723012398107e-05, | |
| "loss": 0.5996, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 7.854489164086687, | |
| "grad_norm": 8.740518115405091, | |
| "learning_rate": 2.768219156490113e-05, | |
| "loss": 0.6713, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 7.860681114551084, | |
| "grad_norm": 8.053879690325598, | |
| "learning_rate": 2.760721572668284e-05, | |
| "loss": 0.6038, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 7.86687306501548, | |
| "grad_norm": 7.7899110456991645, | |
| "learning_rate": 2.7532302820034615e-05, | |
| "loss": 0.6523, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 7.8730650154798765, | |
| "grad_norm": 9.394868216923037, | |
| "learning_rate": 2.7457453055487926e-05, | |
| "loss": 0.6368, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 7.879256965944273, | |
| "grad_norm": 8.943817630593156, | |
| "learning_rate": 2.7382666643396826e-05, | |
| "loss": 0.6528, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 7.885448916408668, | |
| "grad_norm": 8.256568967345226, | |
| "learning_rate": 2.730794379393742e-05, | |
| "loss": 0.6643, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 7.891640866873065, | |
| "grad_norm": 7.967517426684291, | |
| "learning_rate": 2.7233284717107023e-05, | |
| "loss": 0.6318, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 7.897832817337461, | |
| "grad_norm": 9.106426627691587, | |
| "learning_rate": 2.7158689622723815e-05, | |
| "loss": 0.62, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 7.904024767801857, | |
| "grad_norm": 8.626172731573822, | |
| "learning_rate": 2.708415872042618e-05, | |
| "loss": 0.6653, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 7.910216718266254, | |
| "grad_norm": 7.899309432529724, | |
| "learning_rate": 2.7009692219672023e-05, | |
| "loss": 0.6217, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 7.91640866873065, | |
| "grad_norm": 7.437612191892225, | |
| "learning_rate": 2.6935290329738328e-05, | |
| "loss": 0.6785, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 7.922600619195046, | |
| "grad_norm": 7.836176660059616, | |
| "learning_rate": 2.6860953259720472e-05, | |
| "loss": 0.7083, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 7.928792569659443, | |
| "grad_norm": 7.250084926834517, | |
| "learning_rate": 2.678668121853167e-05, | |
| "loss": 0.6753, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 7.934984520123839, | |
| "grad_norm": 8.25845490620734, | |
| "learning_rate": 2.67124744149024e-05, | |
| "loss": 0.596, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 7.9411764705882355, | |
| "grad_norm": 7.65357158563213, | |
| "learning_rate": 2.6638333057379746e-05, | |
| "loss": 0.5703, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 7.947368421052632, | |
| "grad_norm": 7.780251735268891, | |
| "learning_rate": 2.6564257354326914e-05, | |
| "loss": 0.5831, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 7.953560371517028, | |
| "grad_norm": 7.638688702033941, | |
| "learning_rate": 2.6490247513922588e-05, | |
| "loss": 0.6565, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 7.959752321981425, | |
| "grad_norm": 8.272739197943404, | |
| "learning_rate": 2.641630374416036e-05, | |
| "loss": 0.663, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 7.965944272445821, | |
| "grad_norm": 8.09381850289007, | |
| "learning_rate": 2.634242625284812e-05, | |
| "loss": 0.6729, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 7.972136222910216, | |
| "grad_norm": 8.327620204390481, | |
| "learning_rate": 2.626861524760753e-05, | |
| "loss": 0.6725, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 7.978328173374613, | |
| "grad_norm": 8.199164428109075, | |
| "learning_rate": 2.6194870935873337e-05, | |
| "loss": 0.6722, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 7.984520123839009, | |
| "grad_norm": 8.09738085151205, | |
| "learning_rate": 2.6121193524892918e-05, | |
| "loss": 0.6153, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 7.9907120743034055, | |
| "grad_norm": 8.716955283383768, | |
| "learning_rate": 2.6047583221725614e-05, | |
| "loss": 0.7093, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 7.996904024767802, | |
| "grad_norm": 9.226801772932648, | |
| "learning_rate": 2.597404023324217e-05, | |
| "loss": 0.678, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 8.387274641654235, | |
| "learning_rate": 2.5900564766124162e-05, | |
| "loss": 0.2941, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 8.006191950464396, | |
| "grad_norm": 5.211729588648789, | |
| "learning_rate": 2.582715702686337e-05, | |
| "loss": 0.2631, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 8.012383900928793, | |
| "grad_norm": 5.90311339456378, | |
| "learning_rate": 2.575381722176128e-05, | |
| "loss": 0.272, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 8.018575851393189, | |
| "grad_norm": 4.971151141966583, | |
| "learning_rate": 2.5680545556928438e-05, | |
| "loss": 0.2773, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 8.024767801857585, | |
| "grad_norm": 5.6081242166562895, | |
| "learning_rate": 2.5607342238283903e-05, | |
| "loss": 0.2813, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 8.030959752321982, | |
| "grad_norm": 5.260206176471535, | |
| "learning_rate": 2.5534207471554646e-05, | |
| "loss": 0.2712, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 8.037151702786378, | |
| "grad_norm": 5.4508788974381535, | |
| "learning_rate": 2.5461141462275002e-05, | |
| "loss": 0.2218, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 8.043343653250774, | |
| "grad_norm": 6.0916412994862155, | |
| "learning_rate": 2.5388144415786026e-05, | |
| "loss": 0.258, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 8.04953560371517, | |
| "grad_norm": 5.9748079893438675, | |
| "learning_rate": 2.531521653723501e-05, | |
| "loss": 0.2532, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 8.055727554179567, | |
| "grad_norm": 6.449372417182895, | |
| "learning_rate": 2.5242358031574853e-05, | |
| "loss": 0.2116, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 8.061919504643964, | |
| "grad_norm": 5.822373664510092, | |
| "learning_rate": 2.5169569103563485e-05, | |
| "loss": 0.2504, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 8.06811145510836, | |
| "grad_norm": 6.606672924694336, | |
| "learning_rate": 2.5096849957763292e-05, | |
| "loss": 0.2287, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 8.074303405572756, | |
| "grad_norm": 6.5527604451433925, | |
| "learning_rate": 2.5024200798540575e-05, | |
| "loss": 0.2291, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 8.08049535603715, | |
| "grad_norm": 7.954142111358529, | |
| "learning_rate": 2.4951621830064886e-05, | |
| "loss": 0.2777, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 8.086687306501547, | |
| "grad_norm": 6.50698468501565, | |
| "learning_rate": 2.4879113256308584e-05, | |
| "loss": 0.2005, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 8.092879256965944, | |
| "grad_norm": 6.98129438122496, | |
| "learning_rate": 2.4806675281046165e-05, | |
| "loss": 0.2766, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 8.09907120743034, | |
| "grad_norm": 6.951573545459006, | |
| "learning_rate": 2.473430810785372e-05, | |
| "loss": 0.2362, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 8.105263157894736, | |
| "grad_norm": 6.40999249025508, | |
| "learning_rate": 2.4662011940108382e-05, | |
| "loss": 0.1954, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 8.111455108359133, | |
| "grad_norm": 6.203154234738614, | |
| "learning_rate": 2.4589786980987672e-05, | |
| "loss": 0.2153, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 8.117647058823529, | |
| "grad_norm": 5.763352677386173, | |
| "learning_rate": 2.451763343346906e-05, | |
| "loss": 0.2, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 8.123839009287925, | |
| "grad_norm": 6.877662577956943, | |
| "learning_rate": 2.444555150032929e-05, | |
| "loss": 0.2158, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 8.130030959752322, | |
| "grad_norm": 7.234338181496438, | |
| "learning_rate": 2.437354138414385e-05, | |
| "loss": 0.1984, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 8.136222910216718, | |
| "grad_norm": 6.347476827570805, | |
| "learning_rate": 2.4301603287286402e-05, | |
| "loss": 0.231, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 8.142414860681114, | |
| "grad_norm": 7.80579664610214, | |
| "learning_rate": 2.4229737411928223e-05, | |
| "loss": 0.2432, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 8.14860681114551, | |
| "grad_norm": 6.238879965547669, | |
| "learning_rate": 2.415794396003756e-05, | |
| "loss": 0.2232, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 8.154798761609907, | |
| "grad_norm": 6.607933580001818, | |
| "learning_rate": 2.4086223133379198e-05, | |
| "loss": 0.2415, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 8.160990712074303, | |
| "grad_norm": 6.673368860084464, | |
| "learning_rate": 2.4014575133513782e-05, | |
| "loss": 0.2437, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 8.1671826625387, | |
| "grad_norm": 6.892100326398936, | |
| "learning_rate": 2.39430001617973e-05, | |
| "loss": 0.2335, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 8.173374613003096, | |
| "grad_norm": 6.726872628726199, | |
| "learning_rate": 2.387149841938052e-05, | |
| "loss": 0.2153, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 8.179566563467493, | |
| "grad_norm": 6.812053411668685, | |
| "learning_rate": 2.3800070107208354e-05, | |
| "loss": 0.2419, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 8.185758513931889, | |
| "grad_norm": 7.3739792106488125, | |
| "learning_rate": 2.3728715426019388e-05, | |
| "loss": 0.2439, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 8.191950464396285, | |
| "grad_norm": 5.674140988921804, | |
| "learning_rate": 2.3657434576345332e-05, | |
| "loss": 0.2188, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 8.198142414860682, | |
| "grad_norm": 7.459872374663674, | |
| "learning_rate": 2.3586227758510298e-05, | |
| "loss": 0.2584, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 8.204334365325078, | |
| "grad_norm": 7.645791139054171, | |
| "learning_rate": 2.3515095172630408e-05, | |
| "loss": 0.2019, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 8.210526315789474, | |
| "grad_norm": 6.53752820108343, | |
| "learning_rate": 2.3444037018613175e-05, | |
| "loss": 0.2231, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 8.21671826625387, | |
| "grad_norm": 6.062725182200797, | |
| "learning_rate": 2.3373053496156867e-05, | |
| "loss": 0.2144, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 8.222910216718267, | |
| "grad_norm": 6.492996483333465, | |
| "learning_rate": 2.3302144804750074e-05, | |
| "loss": 0.2034, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 8.229102167182663, | |
| "grad_norm": 6.512855794322941, | |
| "learning_rate": 2.3231311143671075e-05, | |
| "loss": 0.2249, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 8.235294117647058, | |
| "grad_norm": 6.768017013410216, | |
| "learning_rate": 2.3160552711987264e-05, | |
| "loss": 0.2389, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 8.241486068111454, | |
| "grad_norm": 7.0049409880668705, | |
| "learning_rate": 2.3089869708554657e-05, | |
| "loss": 0.249, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 8.24767801857585, | |
| "grad_norm": 7.129557311334126, | |
| "learning_rate": 2.301926233201721e-05, | |
| "loss": 0.2301, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 8.253869969040247, | |
| "grad_norm": 6.815250387575155, | |
| "learning_rate": 2.2948730780806404e-05, | |
| "loss": 0.221, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 8.260061919504643, | |
| "grad_norm": 7.607829590891377, | |
| "learning_rate": 2.2878275253140664e-05, | |
| "loss": 0.249, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 8.26625386996904, | |
| "grad_norm": 5.606348163292512, | |
| "learning_rate": 2.2807895947024643e-05, | |
| "loss": 0.1852, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 8.272445820433436, | |
| "grad_norm": 7.122673389168984, | |
| "learning_rate": 2.2737593060248886e-05, | |
| "loss": 0.2094, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 8.278637770897832, | |
| "grad_norm": 6.691918362772205, | |
| "learning_rate": 2.266736679038915e-05, | |
| "loss": 0.1943, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 8.284829721362229, | |
| "grad_norm": 6.213499448759762, | |
| "learning_rate": 2.2597217334805816e-05, | |
| "loss": 0.2415, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 8.291021671826625, | |
| "grad_norm": 6.523759773656105, | |
| "learning_rate": 2.2527144890643465e-05, | |
| "loss": 0.2133, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 8.297213622291022, | |
| "grad_norm": 7.243851567114579, | |
| "learning_rate": 2.2457149654830207e-05, | |
| "loss": 0.24, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 8.303405572755418, | |
| "grad_norm": 6.073498216754716, | |
| "learning_rate": 2.238723182407719e-05, | |
| "loss": 0.1831, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 8.309597523219814, | |
| "grad_norm": 6.7832682484635, | |
| "learning_rate": 2.2317391594878028e-05, | |
| "loss": 0.2326, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 8.31578947368421, | |
| "grad_norm": 6.861708354215309, | |
| "learning_rate": 2.2247629163508204e-05, | |
| "loss": 0.2261, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 8.321981424148607, | |
| "grad_norm": 5.755130687763803, | |
| "learning_rate": 2.2177944726024608e-05, | |
| "loss": 0.2013, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 8.328173374613003, | |
| "grad_norm": 6.310832057848415, | |
| "learning_rate": 2.2108338478264933e-05, | |
| "loss": 0.2022, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 8.3343653250774, | |
| "grad_norm": 7.052846963494786, | |
| "learning_rate": 2.2038810615847116e-05, | |
| "loss": 0.2329, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 8.340557275541796, | |
| "grad_norm": 7.319191156822584, | |
| "learning_rate": 2.196936133416882e-05, | |
| "loss": 0.2554, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 8.346749226006192, | |
| "grad_norm": 6.439710870985042, | |
| "learning_rate": 2.1899990828406875e-05, | |
| "loss": 0.219, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 8.352941176470589, | |
| "grad_norm": 8.63536798331795, | |
| "learning_rate": 2.1830699293516677e-05, | |
| "loss": 0.2386, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 8.359133126934985, | |
| "grad_norm": 6.487908225097029, | |
| "learning_rate": 2.1761486924231727e-05, | |
| "loss": 0.2433, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 8.365325077399381, | |
| "grad_norm": 7.390858801166949, | |
| "learning_rate": 2.1692353915063046e-05, | |
| "loss": 0.2371, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 8.371517027863778, | |
| "grad_norm": 6.47598503313347, | |
| "learning_rate": 2.16233004602986e-05, | |
| "loss": 0.2029, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 8.377708978328174, | |
| "grad_norm": 6.2867756845983145, | |
| "learning_rate": 2.155432675400283e-05, | |
| "loss": 0.2472, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 8.38390092879257, | |
| "grad_norm": 6.006656853265457, | |
| "learning_rate": 2.1485432990015958e-05, | |
| "loss": 0.248, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 8.390092879256965, | |
| "grad_norm": 6.6881962837345865, | |
| "learning_rate": 2.1416619361953637e-05, | |
| "loss": 0.2172, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 8.396284829721361, | |
| "grad_norm": 6.157484880815013, | |
| "learning_rate": 2.1347886063206267e-05, | |
| "loss": 0.2035, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 8.402476780185758, | |
| "grad_norm": 7.363173374657094, | |
| "learning_rate": 2.1279233286938504e-05, | |
| "loss": 0.1999, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 8.408668730650154, | |
| "grad_norm": 7.096607612479734, | |
| "learning_rate": 2.1210661226088703e-05, | |
| "loss": 0.2081, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 8.41486068111455, | |
| "grad_norm": 6.308380615184922, | |
| "learning_rate": 2.1142170073368395e-05, | |
| "loss": 0.1943, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 8.421052631578947, | |
| "grad_norm": 8.152189818300819, | |
| "learning_rate": 2.1073760021261684e-05, | |
| "loss": 0.244, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 8.427244582043343, | |
| "grad_norm": 5.708244269346329, | |
| "learning_rate": 2.100543126202481e-05, | |
| "loss": 0.182, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 8.43343653250774, | |
| "grad_norm": 5.831920283298659, | |
| "learning_rate": 2.0937183987685526e-05, | |
| "loss": 0.2175, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 8.439628482972136, | |
| "grad_norm": 6.42210420654083, | |
| "learning_rate": 2.0869018390042588e-05, | |
| "loss": 0.2242, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 8.445820433436532, | |
| "grad_norm": 6.980364372889547, | |
| "learning_rate": 2.0800934660665223e-05, | |
| "loss": 0.2227, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 8.452012383900929, | |
| "grad_norm": 6.210489409979137, | |
| "learning_rate": 2.073293299089253e-05, | |
| "loss": 0.2499, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 8.458204334365325, | |
| "grad_norm": 5.795459108999961, | |
| "learning_rate": 2.066501357183305e-05, | |
| "loss": 0.1959, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 8.464396284829721, | |
| "grad_norm": 7.284135854418629, | |
| "learning_rate": 2.0597176594364148e-05, | |
| "loss": 0.2239, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 8.470588235294118, | |
| "grad_norm": 8.90348810962731, | |
| "learning_rate": 2.0529422249131496e-05, | |
| "loss": 0.2376, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 8.476780185758514, | |
| "grad_norm": 7.866093620555389, | |
| "learning_rate": 2.0461750726548556e-05, | |
| "loss": 0.2203, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 8.48297213622291, | |
| "grad_norm": 6.599369984837879, | |
| "learning_rate": 2.0394162216796024e-05, | |
| "loss": 0.1969, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 8.489164086687307, | |
| "grad_norm": 6.975347206985036, | |
| "learning_rate": 2.0326656909821263e-05, | |
| "loss": 0.2117, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 8.495356037151703, | |
| "grad_norm": 6.442740030001861, | |
| "learning_rate": 2.0259234995337863e-05, | |
| "loss": 0.1838, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 8.5015479876161, | |
| "grad_norm": 5.373775430255688, | |
| "learning_rate": 2.019189666282501e-05, | |
| "loss": 0.1809, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 8.507739938080496, | |
| "grad_norm": 7.150905800287018, | |
| "learning_rate": 2.0124642101527035e-05, | |
| "loss": 0.2204, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 8.513931888544892, | |
| "grad_norm": 7.521629447519654, | |
| "learning_rate": 2.0057471500452825e-05, | |
| "loss": 0.2018, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 8.520123839009289, | |
| "grad_norm": 6.388463961172899, | |
| "learning_rate": 1.9990385048375278e-05, | |
| "loss": 0.2191, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 8.526315789473685, | |
| "grad_norm": 6.512424992268914, | |
| "learning_rate": 1.9923382933830836e-05, | |
| "loss": 0.2092, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 8.532507739938081, | |
| "grad_norm": 6.852904088515052, | |
| "learning_rate": 1.9856465345118925e-05, | |
| "loss": 0.2308, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 8.538699690402478, | |
| "grad_norm": 5.980947731318695, | |
| "learning_rate": 1.9789632470301422e-05, | |
| "loss": 0.1847, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 8.544891640866872, | |
| "grad_norm": 6.324483999200606, | |
| "learning_rate": 1.9722884497202116e-05, | |
| "loss": 0.1987, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 8.551083591331269, | |
| "grad_norm": 8.030618441440813, | |
| "learning_rate": 1.9656221613406218e-05, | |
| "loss": 0.2644, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 8.557275541795665, | |
| "grad_norm": 6.5345265520961435, | |
| "learning_rate": 1.9589644006259756e-05, | |
| "loss": 0.2416, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 8.563467492260061, | |
| "grad_norm": 6.397613852570123, | |
| "learning_rate": 1.952315186286915e-05, | |
| "loss": 0.222, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 8.569659442724458, | |
| "grad_norm": 7.04089489831636, | |
| "learning_rate": 1.945674537010062e-05, | |
| "loss": 0.2027, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 8.575851393188854, | |
| "grad_norm": 6.97787087592902, | |
| "learning_rate": 1.939042471457968e-05, | |
| "loss": 0.2201, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 8.58204334365325, | |
| "grad_norm": 6.793799389798548, | |
| "learning_rate": 1.9324190082690636e-05, | |
| "loss": 0.2109, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 8.588235294117647, | |
| "grad_norm": 5.469745216879858, | |
| "learning_rate": 1.925804166057596e-05, | |
| "loss": 0.1942, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 8.594427244582043, | |
| "grad_norm": 6.219880888435084, | |
| "learning_rate": 1.9191979634135927e-05, | |
| "loss": 0.2339, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 8.60061919504644, | |
| "grad_norm": 5.747634765720123, | |
| "learning_rate": 1.9126004189027973e-05, | |
| "loss": 0.1637, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 8.606811145510836, | |
| "grad_norm": 4.803150560563886, | |
| "learning_rate": 1.906011551066622e-05, | |
| "loss": 0.1709, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 8.613003095975232, | |
| "grad_norm": 6.09335819477407, | |
| "learning_rate": 1.8994313784220942e-05, | |
| "loss": 0.2319, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 8.619195046439629, | |
| "grad_norm": 6.380835466587691, | |
| "learning_rate": 1.8928599194618052e-05, | |
| "loss": 0.2321, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 8.625386996904025, | |
| "grad_norm": 6.69520005892915, | |
| "learning_rate": 1.8862971926538553e-05, | |
| "loss": 0.2249, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 8.631578947368421, | |
| "grad_norm": 6.22365941945241, | |
| "learning_rate": 1.8797432164418072e-05, | |
| "loss": 0.1636, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 8.637770897832818, | |
| "grad_norm": 7.112655886617888, | |
| "learning_rate": 1.8731980092446306e-05, | |
| "loss": 0.2064, | |
| "step": 1399 | |
| }, | |
| { | |
| "epoch": 8.643962848297214, | |
| "grad_norm": 7.141282558450403, | |
| "learning_rate": 1.8666615894566515e-05, | |
| "loss": 0.2132, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 8.65015479876161, | |
| "grad_norm": 5.79129211335436, | |
| "learning_rate": 1.8601339754475005e-05, | |
| "loss": 0.1898, | |
| "step": 1401 | |
| }, | |
| { | |
| "epoch": 8.656346749226007, | |
| "grad_norm": 7.147248850656452, | |
| "learning_rate": 1.853615185562058e-05, | |
| "loss": 0.2155, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 8.662538699690403, | |
| "grad_norm": 6.414338629491582, | |
| "learning_rate": 1.847105238120409e-05, | |
| "loss": 0.1884, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 8.6687306501548, | |
| "grad_norm": 7.722688748172245, | |
| "learning_rate": 1.8406041514177868e-05, | |
| "loss": 0.2542, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 8.674922600619196, | |
| "grad_norm": 5.937274557351227, | |
| "learning_rate": 1.834111943724523e-05, | |
| "loss": 0.1901, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 8.681114551083592, | |
| "grad_norm": 6.007815234092331, | |
| "learning_rate": 1.827628633285996e-05, | |
| "loss": 0.1936, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 8.687306501547988, | |
| "grad_norm": 7.1847410434373975, | |
| "learning_rate": 1.821154238322581e-05, | |
| "loss": 0.2595, | |
| "step": 1407 | |
| }, | |
| { | |
| "epoch": 8.693498452012385, | |
| "grad_norm": 7.077045758363068, | |
| "learning_rate": 1.8146887770295938e-05, | |
| "loss": 0.1876, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 8.69969040247678, | |
| "grad_norm": 7.12292958517752, | |
| "learning_rate": 1.808232267577248e-05, | |
| "loss": 0.2041, | |
| "step": 1409 | |
| }, | |
| { | |
| "epoch": 8.705882352941176, | |
| "grad_norm": 6.1739975800363505, | |
| "learning_rate": 1.801784728110596e-05, | |
| "loss": 0.2048, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 8.712074303405572, | |
| "grad_norm": 7.536190701947081, | |
| "learning_rate": 1.795346176749484e-05, | |
| "loss": 0.2403, | |
| "step": 1411 | |
| }, | |
| { | |
| "epoch": 8.718266253869968, | |
| "grad_norm": 6.304015603478306, | |
| "learning_rate": 1.7889166315884975e-05, | |
| "loss": 0.1865, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 8.724458204334365, | |
| "grad_norm": 7.579468960849294, | |
| "learning_rate": 1.7824961106969124e-05, | |
| "loss": 0.2214, | |
| "step": 1413 | |
| }, | |
| { | |
| "epoch": 8.730650154798761, | |
| "grad_norm": 7.438004355915212, | |
| "learning_rate": 1.7760846321186386e-05, | |
| "loss": 0.2221, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 8.736842105263158, | |
| "grad_norm": 7.413131864673324, | |
| "learning_rate": 1.7696822138721798e-05, | |
| "loss": 0.1988, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 8.743034055727554, | |
| "grad_norm": 6.689688611213779, | |
| "learning_rate": 1.7632888739505742e-05, | |
| "loss": 0.1904, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 8.74922600619195, | |
| "grad_norm": 6.985013462114578, | |
| "learning_rate": 1.756904630321347e-05, | |
| "loss": 0.217, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 8.755417956656347, | |
| "grad_norm": 7.116843076668186, | |
| "learning_rate": 1.7505295009264616e-05, | |
| "loss": 0.229, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 8.761609907120743, | |
| "grad_norm": 6.850052751434371, | |
| "learning_rate": 1.7441635036822624e-05, | |
| "loss": 0.2162, | |
| "step": 1419 | |
| }, | |
| { | |
| "epoch": 8.76780185758514, | |
| "grad_norm": 5.339204147705435, | |
| "learning_rate": 1.737806656479433e-05, | |
| "loss": 0.1688, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 8.773993808049536, | |
| "grad_norm": 5.908151183271524, | |
| "learning_rate": 1.7314589771829427e-05, | |
| "loss": 0.1885, | |
| "step": 1421 | |
| }, | |
| { | |
| "epoch": 8.780185758513932, | |
| "grad_norm": 6.000434459305432, | |
| "learning_rate": 1.7251204836319935e-05, | |
| "loss": 0.1856, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 8.786377708978328, | |
| "grad_norm": 6.598456106670304, | |
| "learning_rate": 1.718791193639973e-05, | |
| "loss": 0.1943, | |
| "step": 1423 | |
| }, | |
| { | |
| "epoch": 8.792569659442725, | |
| "grad_norm": 6.881360078803345, | |
| "learning_rate": 1.712471124994407e-05, | |
| "loss": 0.2141, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 8.798761609907121, | |
| "grad_norm": 6.513661403234104, | |
| "learning_rate": 1.706160295456898e-05, | |
| "loss": 0.1744, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 8.804953560371517, | |
| "grad_norm": 7.090429945824626, | |
| "learning_rate": 1.6998587227630904e-05, | |
| "loss": 0.2304, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 8.811145510835914, | |
| "grad_norm": 5.873530151320455, | |
| "learning_rate": 1.6935664246226118e-05, | |
| "loss": 0.2002, | |
| "step": 1427 | |
| }, | |
| { | |
| "epoch": 8.81733746130031, | |
| "grad_norm": 6.590128634706223, | |
| "learning_rate": 1.6872834187190245e-05, | |
| "loss": 0.2246, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 8.823529411764707, | |
| "grad_norm": 5.970723590237332, | |
| "learning_rate": 1.6810097227097782e-05, | |
| "loss": 0.1959, | |
| "step": 1429 | |
| }, | |
| { | |
| "epoch": 8.829721362229103, | |
| "grad_norm": 6.0325861218360615, | |
| "learning_rate": 1.6747453542261538e-05, | |
| "loss": 0.1852, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 8.8359133126935, | |
| "grad_norm": 6.203393082510939, | |
| "learning_rate": 1.6684903308732232e-05, | |
| "loss": 0.208, | |
| "step": 1431 | |
| }, | |
| { | |
| "epoch": 8.842105263157894, | |
| "grad_norm": 6.037468114687763, | |
| "learning_rate": 1.6622446702297935e-05, | |
| "loss": 0.208, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 8.848297213622292, | |
| "grad_norm": 5.4456839372955494, | |
| "learning_rate": 1.6560083898483596e-05, | |
| "loss": 0.1771, | |
| "step": 1433 | |
| }, | |
| { | |
| "epoch": 8.854489164086687, | |
| "grad_norm": 6.83245124590101, | |
| "learning_rate": 1.6497815072550555e-05, | |
| "loss": 0.2057, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 8.860681114551083, | |
| "grad_norm": 5.735714387101287, | |
| "learning_rate": 1.6435640399496034e-05, | |
| "loss": 0.1699, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 8.86687306501548, | |
| "grad_norm": 7.546965433664666, | |
| "learning_rate": 1.6373560054052627e-05, | |
| "loss": 0.1851, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 8.873065015479876, | |
| "grad_norm": 5.8595469659092565, | |
| "learning_rate": 1.6311574210687862e-05, | |
| "loss": 0.1618, | |
| "step": 1437 | |
| }, | |
| { | |
| "epoch": 8.879256965944272, | |
| "grad_norm": 6.0645864067771615, | |
| "learning_rate": 1.6249683043603696e-05, | |
| "loss": 0.1613, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 8.885448916408668, | |
| "grad_norm": 6.816007484225182, | |
| "learning_rate": 1.618788672673598e-05, | |
| "loss": 0.2141, | |
| "step": 1439 | |
| }, | |
| { | |
| "epoch": 8.891640866873065, | |
| "grad_norm": 6.490848833243287, | |
| "learning_rate": 1.6126185433754033e-05, | |
| "loss": 0.1743, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 8.897832817337461, | |
| "grad_norm": 6.304776051555149, | |
| "learning_rate": 1.6064579338060087e-05, | |
| "loss": 0.2084, | |
| "step": 1441 | |
| }, | |
| { | |
| "epoch": 8.904024767801857, | |
| "grad_norm": 6.992876586920148, | |
| "learning_rate": 1.6003068612788864e-05, | |
| "loss": 0.1885, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 8.910216718266254, | |
| "grad_norm": 6.879469286384281, | |
| "learning_rate": 1.594165343080705e-05, | |
| "loss": 0.2046, | |
| "step": 1443 | |
| }, | |
| { | |
| "epoch": 8.91640866873065, | |
| "grad_norm": 7.295873180238467, | |
| "learning_rate": 1.588033396471284e-05, | |
| "loss": 0.2236, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 8.922600619195046, | |
| "grad_norm": 5.737170436178231, | |
| "learning_rate": 1.5819110386835413e-05, | |
| "loss": 0.1985, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 8.928792569659443, | |
| "grad_norm": 5.2389320863854865, | |
| "learning_rate": 1.575798286923449e-05, | |
| "loss": 0.1792, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 8.93498452012384, | |
| "grad_norm": 7.020547241770886, | |
| "learning_rate": 1.5696951583699777e-05, | |
| "loss": 0.1968, | |
| "step": 1447 | |
| }, | |
| { | |
| "epoch": 8.941176470588236, | |
| "grad_norm": 6.067220274895548, | |
| "learning_rate": 1.56360167017506e-05, | |
| "loss": 0.2032, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 8.947368421052632, | |
| "grad_norm": 6.526140400640363, | |
| "learning_rate": 1.5575178394635316e-05, | |
| "loss": 0.1789, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 8.953560371517028, | |
| "grad_norm": 6.8961109487146, | |
| "learning_rate": 1.5514436833330892e-05, | |
| "loss": 0.2225, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 8.959752321981425, | |
| "grad_norm": 5.92248307884942, | |
| "learning_rate": 1.545379218854241e-05, | |
| "loss": 0.1735, | |
| "step": 1451 | |
| }, | |
| { | |
| "epoch": 8.965944272445821, | |
| "grad_norm": 6.033455110902586, | |
| "learning_rate": 1.5393244630702542e-05, | |
| "loss": 0.1866, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 8.972136222910217, | |
| "grad_norm": 5.815481467106758, | |
| "learning_rate": 1.5332794329971155e-05, | |
| "loss": 0.1862, | |
| "step": 1453 | |
| }, | |
| { | |
| "epoch": 8.978328173374614, | |
| "grad_norm": 6.187467776033898, | |
| "learning_rate": 1.5272441456234783e-05, | |
| "loss": 0.1646, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 8.98452012383901, | |
| "grad_norm": 6.001636770763615, | |
| "learning_rate": 1.5212186179106141e-05, | |
| "loss": 0.1628, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 8.990712074303406, | |
| "grad_norm": 6.427541654539285, | |
| "learning_rate": 1.5152028667923668e-05, | |
| "loss": 0.1953, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 8.996904024767801, | |
| "grad_norm": 6.26902193898391, | |
| "learning_rate": 1.5091969091751074e-05, | |
| "loss": 0.1708, | |
| "step": 1457 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 6.26902193898391, | |
| "learning_rate": 1.503200761937677e-05, | |
| "loss": 0.0859, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 9.006191950464396, | |
| "grad_norm": 4.71081559874955, | |
| "learning_rate": 1.4972144419313527e-05, | |
| "loss": 0.0635, | |
| "step": 1459 | |
| }, | |
| { | |
| "epoch": 9.012383900928793, | |
| "grad_norm": 2.650543963787472, | |
| "learning_rate": 1.4912379659797909e-05, | |
| "loss": 0.0683, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 9.018575851393189, | |
| "grad_norm": 3.872097740667003, | |
| "learning_rate": 1.4852713508789833e-05, | |
| "loss": 0.0521, | |
| "step": 1461 | |
| }, | |
| { | |
| "epoch": 9.024767801857585, | |
| "grad_norm": 3.016760985315231, | |
| "learning_rate": 1.4793146133972108e-05, | |
| "loss": 0.0805, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 9.030959752321982, | |
| "grad_norm": 2.6210030999981724, | |
| "learning_rate": 1.4733677702749893e-05, | |
| "loss": 0.0445, | |
| "step": 1463 | |
| }, | |
| { | |
| "epoch": 9.037151702786378, | |
| "grad_norm": 2.7800323781665552, | |
| "learning_rate": 1.4674308382250334e-05, | |
| "loss": 0.0571, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 9.043343653250774, | |
| "grad_norm": 3.093182907029037, | |
| "learning_rate": 1.4615038339322024e-05, | |
| "loss": 0.0727, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 9.04953560371517, | |
| "grad_norm": 5.291876382279551, | |
| "learning_rate": 1.4555867740534552e-05, | |
| "loss": 0.0537, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 9.055727554179567, | |
| "grad_norm": 3.151990570075643, | |
| "learning_rate": 1.4496796752178033e-05, | |
| "loss": 0.0639, | |
| "step": 1467 | |
| }, | |
| { | |
| "epoch": 9.061919504643964, | |
| "grad_norm": 3.217456007131016, | |
| "learning_rate": 1.4437825540262661e-05, | |
| "loss": 0.0498, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 9.06811145510836, | |
| "grad_norm": 3.2934700680884474, | |
| "learning_rate": 1.437895427051817e-05, | |
| "loss": 0.0535, | |
| "step": 1469 | |
| }, | |
| { | |
| "epoch": 9.074303405572756, | |
| "grad_norm": 3.685437107993493, | |
| "learning_rate": 1.4320183108393465e-05, | |
| "loss": 0.0638, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 9.08049535603715, | |
| "grad_norm": 4.129874568835661, | |
| "learning_rate": 1.4261512219056117e-05, | |
| "loss": 0.0592, | |
| "step": 1471 | |
| }, | |
| { | |
| "epoch": 9.086687306501547, | |
| "grad_norm": 3.600504013809219, | |
| "learning_rate": 1.420294176739188e-05, | |
| "loss": 0.0565, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 9.092879256965944, | |
| "grad_norm": 3.7328453732235576, | |
| "learning_rate": 1.4144471918004254e-05, | |
| "loss": 0.0596, | |
| "step": 1473 | |
| }, | |
| { | |
| "epoch": 9.09907120743034, | |
| "grad_norm": 4.263048430319195, | |
| "learning_rate": 1.4086102835213977e-05, | |
| "loss": 0.0651, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 9.105263157894736, | |
| "grad_norm": 3.7784042882751634, | |
| "learning_rate": 1.4027834683058639e-05, | |
| "loss": 0.0535, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 9.111455108359133, | |
| "grad_norm": 3.1593539758220412, | |
| "learning_rate": 1.3969667625292165e-05, | |
| "loss": 0.0478, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 9.117647058823529, | |
| "grad_norm": 3.719540430139627, | |
| "learning_rate": 1.3911601825384368e-05, | |
| "loss": 0.058, | |
| "step": 1477 | |
| }, | |
| { | |
| "epoch": 9.123839009287925, | |
| "grad_norm": 3.519368419406696, | |
| "learning_rate": 1.385363744652049e-05, | |
| "loss": 0.0467, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 9.130030959752322, | |
| "grad_norm": 3.0737082235214945, | |
| "learning_rate": 1.3795774651600757e-05, | |
| "loss": 0.0544, | |
| "step": 1479 | |
| }, | |
| { | |
| "epoch": 9.136222910216718, | |
| "grad_norm": 3.5400720534438386, | |
| "learning_rate": 1.373801360323988e-05, | |
| "loss": 0.0581, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 9.142414860681114, | |
| "grad_norm": 2.523122843907356, | |
| "learning_rate": 1.3680354463766642e-05, | |
| "loss": 0.0408, | |
| "step": 1481 | |
| }, | |
| { | |
| "epoch": 9.14860681114551, | |
| "grad_norm": 3.003305768858099, | |
| "learning_rate": 1.3622797395223442e-05, | |
| "loss": 0.0535, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 9.154798761609907, | |
| "grad_norm": 4.316472018355042, | |
| "learning_rate": 1.3565342559365807e-05, | |
| "loss": 0.0439, | |
| "step": 1483 | |
| }, | |
| { | |
| "epoch": 9.160990712074303, | |
| "grad_norm": 2.797056168114164, | |
| "learning_rate": 1.3507990117661968e-05, | |
| "loss": 0.0549, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 9.1671826625387, | |
| "grad_norm": 3.675743322728183, | |
| "learning_rate": 1.3450740231292352e-05, | |
| "loss": 0.0524, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 9.173374613003096, | |
| "grad_norm": 4.491453209110375, | |
| "learning_rate": 1.3393593061149224e-05, | |
| "loss": 0.0484, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 9.179566563467493, | |
| "grad_norm": 3.7689249995584087, | |
| "learning_rate": 1.3336548767836144e-05, | |
| "loss": 0.0617, | |
| "step": 1487 | |
| }, | |
| { | |
| "epoch": 9.185758513931889, | |
| "grad_norm": 3.454412071410455, | |
| "learning_rate": 1.3279607511667568e-05, | |
| "loss": 0.0571, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 9.191950464396285, | |
| "grad_norm": 3.991374634067102, | |
| "learning_rate": 1.3222769452668381e-05, | |
| "loss": 0.048, | |
| "step": 1489 | |
| }, | |
| { | |
| "epoch": 9.198142414860682, | |
| "grad_norm": 3.3810153789316537, | |
| "learning_rate": 1.3166034750573452e-05, | |
| "loss": 0.048, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 9.204334365325078, | |
| "grad_norm": 3.3604867226265998, | |
| "learning_rate": 1.3109403564827155e-05, | |
| "loss": 0.0707, | |
| "step": 1491 | |
| }, | |
| { | |
| "epoch": 9.210526315789474, | |
| "grad_norm": 3.6629173401514836, | |
| "learning_rate": 1.3052876054582968e-05, | |
| "loss": 0.0417, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 9.21671826625387, | |
| "grad_norm": 3.3365860079861114, | |
| "learning_rate": 1.2996452378703012e-05, | |
| "loss": 0.0508, | |
| "step": 1493 | |
| }, | |
| { | |
| "epoch": 9.222910216718267, | |
| "grad_norm": 4.766038401106651, | |
| "learning_rate": 1.2940132695757573e-05, | |
| "loss": 0.0498, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 9.229102167182663, | |
| "grad_norm": 3.1082638740258965, | |
| "learning_rate": 1.2883917164024722e-05, | |
| "loss": 0.055, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 9.235294117647058, | |
| "grad_norm": 3.2643471969192897, | |
| "learning_rate": 1.2827805941489769e-05, | |
| "loss": 0.0558, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 9.241486068111454, | |
| "grad_norm": 2.757421991613139, | |
| "learning_rate": 1.2771799185844913e-05, | |
| "loss": 0.0473, | |
| "step": 1497 | |
| }, | |
| { | |
| "epoch": 9.24767801857585, | |
| "grad_norm": 3.585200638599601, | |
| "learning_rate": 1.2715897054488779e-05, | |
| "loss": 0.0404, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 9.253869969040247, | |
| "grad_norm": 3.4491133898528674, | |
| "learning_rate": 1.266009970452593e-05, | |
| "loss": 0.0411, | |
| "step": 1499 | |
| }, | |
| { | |
| "epoch": 9.260061919504643, | |
| "grad_norm": 3.232214722741945, | |
| "learning_rate": 1.2604407292766479e-05, | |
| "loss": 0.0434, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 9.26625386996904, | |
| "grad_norm": 3.792874605987716, | |
| "learning_rate": 1.2548819975725624e-05, | |
| "loss": 0.0382, | |
| "step": 1501 | |
| }, | |
| { | |
| "epoch": 9.272445820433436, | |
| "grad_norm": 3.6381123169244143, | |
| "learning_rate": 1.2493337909623182e-05, | |
| "loss": 0.0416, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 9.278637770897832, | |
| "grad_norm": 3.6507679032886444, | |
| "learning_rate": 1.2437961250383207e-05, | |
| "loss": 0.0341, | |
| "step": 1503 | |
| }, | |
| { | |
| "epoch": 9.284829721362229, | |
| "grad_norm": 3.3853346480918076, | |
| "learning_rate": 1.238269015363352e-05, | |
| "loss": 0.0437, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 9.291021671826625, | |
| "grad_norm": 2.9883819106280134, | |
| "learning_rate": 1.2327524774705267e-05, | |
| "loss": 0.0437, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 9.297213622291022, | |
| "grad_norm": 2.698885880068701, | |
| "learning_rate": 1.227246526863251e-05, | |
| "loss": 0.0445, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 9.303405572755418, | |
| "grad_norm": 3.9540399861290294, | |
| "learning_rate": 1.221751179015172e-05, | |
| "loss": 0.0437, | |
| "step": 1507 | |
| }, | |
| { | |
| "epoch": 9.309597523219814, | |
| "grad_norm": 3.1667486953886526, | |
| "learning_rate": 1.2162664493701437e-05, | |
| "loss": 0.0316, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 9.31578947368421, | |
| "grad_norm": 3.8571973311075203, | |
| "learning_rate": 1.2107923533421795e-05, | |
| "loss": 0.0524, | |
| "step": 1509 | |
| }, | |
| { | |
| "epoch": 9.321981424148607, | |
| "grad_norm": 4.2698398198406675, | |
| "learning_rate": 1.205328906315406e-05, | |
| "loss": 0.0496, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 9.328173374613003, | |
| "grad_norm": 4.64163479354582, | |
| "learning_rate": 1.1998761236440248e-05, | |
| "loss": 0.0544, | |
| "step": 1511 | |
| }, | |
| { | |
| "epoch": 9.3343653250774, | |
| "grad_norm": 3.5948294289307836, | |
| "learning_rate": 1.1944340206522665e-05, | |
| "loss": 0.0397, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 9.340557275541796, | |
| "grad_norm": 4.335126173851826, | |
| "learning_rate": 1.1890026126343445e-05, | |
| "loss": 0.0419, | |
| "step": 1513 | |
| }, | |
| { | |
| "epoch": 9.346749226006192, | |
| "grad_norm": 2.7728511480148628, | |
| "learning_rate": 1.1835819148544203e-05, | |
| "loss": 0.0444, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 9.352941176470589, | |
| "grad_norm": 4.46426094098098, | |
| "learning_rate": 1.1781719425465537e-05, | |
| "loss": 0.0398, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 9.359133126934985, | |
| "grad_norm": 2.9224509475638274, | |
| "learning_rate": 1.172772710914663e-05, | |
| "loss": 0.0349, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 9.365325077399381, | |
| "grad_norm": 3.2074537383154773, | |
| "learning_rate": 1.1673842351324816e-05, | |
| "loss": 0.0495, | |
| "step": 1517 | |
| }, | |
| { | |
| "epoch": 9.371517027863778, | |
| "grad_norm": 3.7593720134605926, | |
| "learning_rate": 1.1620065303435123e-05, | |
| "loss": 0.045, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 9.377708978328174, | |
| "grad_norm": 3.425909943910001, | |
| "learning_rate": 1.1566396116609907e-05, | |
| "loss": 0.0359, | |
| "step": 1519 | |
| }, | |
| { | |
| "epoch": 9.38390092879257, | |
| "grad_norm": 2.5543285672297107, | |
| "learning_rate": 1.1512834941678392e-05, | |
| "loss": 0.0511, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 9.390092879256965, | |
| "grad_norm": 3.726490945738635, | |
| "learning_rate": 1.1459381929166251e-05, | |
| "loss": 0.0444, | |
| "step": 1521 | |
| }, | |
| { | |
| "epoch": 9.396284829721361, | |
| "grad_norm": 3.474670891644341, | |
| "learning_rate": 1.1406037229295168e-05, | |
| "loss": 0.0417, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 9.402476780185758, | |
| "grad_norm": 4.059480970388856, | |
| "learning_rate": 1.1352800991982466e-05, | |
| "loss": 0.0397, | |
| "step": 1523 | |
| }, | |
| { | |
| "epoch": 9.408668730650154, | |
| "grad_norm": 3.8019192883784636, | |
| "learning_rate": 1.129967336684059e-05, | |
| "loss": 0.061, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 9.41486068111455, | |
| "grad_norm": 4.264722681190781, | |
| "learning_rate": 1.1246654503176796e-05, | |
| "loss": 0.0385, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 9.421052631578947, | |
| "grad_norm": 2.805991694585358, | |
| "learning_rate": 1.1193744549992673e-05, | |
| "loss": 0.0481, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 9.427244582043343, | |
| "grad_norm": 4.895292229788722, | |
| "learning_rate": 1.1140943655983727e-05, | |
| "loss": 0.0287, | |
| "step": 1527 | |
| }, | |
| { | |
| "epoch": 9.43343653250774, | |
| "grad_norm": 2.8298220889172376, | |
| "learning_rate": 1.1088251969538971e-05, | |
| "loss": 0.0308, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 9.439628482972136, | |
| "grad_norm": 3.017727827780755, | |
| "learning_rate": 1.103566963874052e-05, | |
| "loss": 0.0414, | |
| "step": 1529 | |
| }, | |
| { | |
| "epoch": 9.445820433436532, | |
| "grad_norm": 3.134114519650563, | |
| "learning_rate": 1.0983196811363117e-05, | |
| "loss": 0.0455, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 9.452012383900929, | |
| "grad_norm": 3.853461797471628, | |
| "learning_rate": 1.093083363487381e-05, | |
| "loss": 0.0471, | |
| "step": 1531 | |
| }, | |
| { | |
| "epoch": 9.458204334365325, | |
| "grad_norm": 3.784491508037374, | |
| "learning_rate": 1.0878580256431476e-05, | |
| "loss": 0.0333, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 9.464396284829721, | |
| "grad_norm": 3.1013783605586096, | |
| "learning_rate": 1.082643682288641e-05, | |
| "loss": 0.0445, | |
| "step": 1533 | |
| }, | |
| { | |
| "epoch": 9.470588235294118, | |
| "grad_norm": 3.024386287468399, | |
| "learning_rate": 1.0774403480779954e-05, | |
| "loss": 0.0398, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 9.476780185758514, | |
| "grad_norm": 2.6304318358318546, | |
| "learning_rate": 1.0722480376343996e-05, | |
| "loss": 0.0365, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 9.48297213622291, | |
| "grad_norm": 3.6055280547773823, | |
| "learning_rate": 1.0670667655500665e-05, | |
| "loss": 0.0384, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 9.489164086687307, | |
| "grad_norm": 4.25296133447424, | |
| "learning_rate": 1.0618965463861868e-05, | |
| "loss": 0.0699, | |
| "step": 1537 | |
| }, | |
| { | |
| "epoch": 9.495356037151703, | |
| "grad_norm": 3.0854868556894988, | |
| "learning_rate": 1.0567373946728882e-05, | |
| "loss": 0.037, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 9.5015479876161, | |
| "grad_norm": 3.536570489303969, | |
| "learning_rate": 1.0515893249091935e-05, | |
| "loss": 0.0421, | |
| "step": 1539 | |
| }, | |
| { | |
| "epoch": 9.507739938080496, | |
| "grad_norm": 3.922968140133842, | |
| "learning_rate": 1.0464523515629849e-05, | |
| "loss": 0.0457, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 9.513931888544892, | |
| "grad_norm": 3.7980928274918324, | |
| "learning_rate": 1.0413264890709546e-05, | |
| "loss": 0.041, | |
| "step": 1541 | |
| }, | |
| { | |
| "epoch": 9.520123839009289, | |
| "grad_norm": 2.9689972166106764, | |
| "learning_rate": 1.0362117518385733e-05, | |
| "loss": 0.0345, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 9.526315789473685, | |
| "grad_norm": 3.7882224991986995, | |
| "learning_rate": 1.0311081542400453e-05, | |
| "loss": 0.0503, | |
| "step": 1543 | |
| }, | |
| { | |
| "epoch": 9.532507739938081, | |
| "grad_norm": 3.4973928447710425, | |
| "learning_rate": 1.0260157106182672e-05, | |
| "loss": 0.0426, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 9.538699690402478, | |
| "grad_norm": 4.1842225938523585, | |
| "learning_rate": 1.0209344352847921e-05, | |
| "loss": 0.0336, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 9.544891640866872, | |
| "grad_norm": 2.9611668772280684, | |
| "learning_rate": 1.0158643425197818e-05, | |
| "loss": 0.0361, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 9.551083591331269, | |
| "grad_norm": 2.6698421025685297, | |
| "learning_rate": 1.0108054465719736e-05, | |
| "loss": 0.0326, | |
| "step": 1547 | |
| }, | |
| { | |
| "epoch": 9.557275541795665, | |
| "grad_norm": 2.907941122419673, | |
| "learning_rate": 1.0057577616586384e-05, | |
| "loss": 0.0475, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 9.563467492260061, | |
| "grad_norm": 3.3638184639805364, | |
| "learning_rate": 1.0007213019655393e-05, | |
| "loss": 0.0376, | |
| "step": 1549 | |
| }, | |
| { | |
| "epoch": 9.569659442724458, | |
| "grad_norm": 4.1309601156713605, | |
| "learning_rate": 9.95696081646893e-06, | |
| "loss": 0.0369, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 9.575851393188854, | |
| "grad_norm": 3.003076302057863, | |
| "learning_rate": 9.906821148253304e-06, | |
| "loss": 0.0372, | |
| "step": 1551 | |
| }, | |
| { | |
| "epoch": 9.58204334365325, | |
| "grad_norm": 2.860865564137365, | |
| "learning_rate": 9.856794155918525e-06, | |
| "loss": 0.0398, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 9.588235294117647, | |
| "grad_norm": 3.4619849919989623, | |
| "learning_rate": 9.806879980057993e-06, | |
| "loss": 0.035, | |
| "step": 1553 | |
| }, | |
| { | |
| "epoch": 9.594427244582043, | |
| "grad_norm": 2.6605517772057286, | |
| "learning_rate": 9.757078760948013e-06, | |
| "loss": 0.036, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 9.60061919504644, | |
| "grad_norm": 2.8748160828272398, | |
| "learning_rate": 9.70739063854748e-06, | |
| "loss": 0.0419, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 9.606811145510836, | |
| "grad_norm": 3.993546683123117, | |
| "learning_rate": 9.657815752497428e-06, | |
| "loss": 0.0462, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 9.613003095975232, | |
| "grad_norm": 3.6620329861297436, | |
| "learning_rate": 9.608354242120638e-06, | |
| "loss": 0.0473, | |
| "step": 1557 | |
| }, | |
| { | |
| "epoch": 9.619195046439629, | |
| "grad_norm": 3.6542556245789153, | |
| "learning_rate": 9.559006246421292e-06, | |
| "loss": 0.0313, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 9.625386996904025, | |
| "grad_norm": 3.488855274052928, | |
| "learning_rate": 9.509771904084557e-06, | |
| "loss": 0.0452, | |
| "step": 1559 | |
| }, | |
| { | |
| "epoch": 9.631578947368421, | |
| "grad_norm": 2.707239777436621, | |
| "learning_rate": 9.46065135347618e-06, | |
| "loss": 0.0332, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 9.637770897832818, | |
| "grad_norm": 3.463158996741532, | |
| "learning_rate": 9.411644732642122e-06, | |
| "loss": 0.0356, | |
| "step": 1561 | |
| }, | |
| { | |
| "epoch": 9.643962848297214, | |
| "grad_norm": 2.950857156541241, | |
| "learning_rate": 9.362752179308176e-06, | |
| "loss": 0.0332, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 9.65015479876161, | |
| "grad_norm": 6.4610121276298855, | |
| "learning_rate": 9.313973830879513e-06, | |
| "loss": 0.0584, | |
| "step": 1563 | |
| }, | |
| { | |
| "epoch": 9.656346749226007, | |
| "grad_norm": 3.783311982771598, | |
| "learning_rate": 9.265309824440404e-06, | |
| "loss": 0.0383, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 9.662538699690403, | |
| "grad_norm": 2.8594458661572455, | |
| "learning_rate": 9.216760296753756e-06, | |
| "loss": 0.0352, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 9.6687306501548, | |
| "grad_norm": 2.568500500523965, | |
| "learning_rate": 9.16832538426075e-06, | |
| "loss": 0.0321, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 9.674922600619196, | |
| "grad_norm": 3.742127090672312, | |
| "learning_rate": 9.120005223080485e-06, | |
| "loss": 0.0467, | |
| "step": 1567 | |
| }, | |
| { | |
| "epoch": 9.681114551083592, | |
| "grad_norm": 4.219351190704549, | |
| "learning_rate": 9.071799949009508e-06, | |
| "loss": 0.0365, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 9.687306501547988, | |
| "grad_norm": 3.1755991636644545, | |
| "learning_rate": 9.023709697521543e-06, | |
| "loss": 0.0407, | |
| "step": 1569 | |
| }, | |
| { | |
| "epoch": 9.693498452012385, | |
| "grad_norm": 3.635230157595945, | |
| "learning_rate": 8.975734603767056e-06, | |
| "loss": 0.0402, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 9.69969040247678, | |
| "grad_norm": 3.78300603635811, | |
| "learning_rate": 8.92787480257286e-06, | |
| "loss": 0.0414, | |
| "step": 1571 | |
| }, | |
| { | |
| "epoch": 9.705882352941176, | |
| "grad_norm": 3.365315495479003, | |
| "learning_rate": 8.880130428441774e-06, | |
| "loss": 0.039, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 9.712074303405572, | |
| "grad_norm": 3.974574854772795, | |
| "learning_rate": 8.832501615552224e-06, | |
| "loss": 0.0472, | |
| "step": 1573 | |
| }, | |
| { | |
| "epoch": 9.718266253869968, | |
| "grad_norm": 3.452851500943343, | |
| "learning_rate": 8.784988497757846e-06, | |
| "loss": 0.0348, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 9.724458204334365, | |
| "grad_norm": 3.062904518542008, | |
| "learning_rate": 8.737591208587158e-06, | |
| "loss": 0.0325, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 9.730650154798761, | |
| "grad_norm": 3.6314811791043593, | |
| "learning_rate": 8.690309881243148e-06, | |
| "loss": 0.0418, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 9.736842105263158, | |
| "grad_norm": 4.1744000045735214, | |
| "learning_rate": 8.643144648602913e-06, | |
| "loss": 0.0375, | |
| "step": 1577 | |
| }, | |
| { | |
| "epoch": 9.743034055727554, | |
| "grad_norm": 3.256942957126009, | |
| "learning_rate": 8.5960956432173e-06, | |
| "loss": 0.0492, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 9.74922600619195, | |
| "grad_norm": 4.165939797178315, | |
| "learning_rate": 8.549162997310467e-06, | |
| "loss": 0.0445, | |
| "step": 1579 | |
| }, | |
| { | |
| "epoch": 9.755417956656347, | |
| "grad_norm": 3.0519982083557933, | |
| "learning_rate": 8.502346842779618e-06, | |
| "loss": 0.0376, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 9.761609907120743, | |
| "grad_norm": 3.760682337684378, | |
| "learning_rate": 8.455647311194537e-06, | |
| "loss": 0.032, | |
| "step": 1581 | |
| }, | |
| { | |
| "epoch": 9.76780185758514, | |
| "grad_norm": 2.756832052460076, | |
| "learning_rate": 8.409064533797284e-06, | |
| "loss": 0.0389, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 9.773993808049536, | |
| "grad_norm": 4.340348325659682, | |
| "learning_rate": 8.362598641501773e-06, | |
| "loss": 0.0337, | |
| "step": 1583 | |
| }, | |
| { | |
| "epoch": 9.780185758513932, | |
| "grad_norm": 2.7303960940322756, | |
| "learning_rate": 8.31624976489347e-06, | |
| "loss": 0.0492, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 9.786377708978328, | |
| "grad_norm": 4.578121431627828, | |
| "learning_rate": 8.270018034228916e-06, | |
| "loss": 0.0389, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 9.792569659442725, | |
| "grad_norm": 3.1414915838212494, | |
| "learning_rate": 8.223903579435499e-06, | |
| "loss": 0.0302, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 9.798761609907121, | |
| "grad_norm": 3.773023692273092, | |
| "learning_rate": 8.177906530110996e-06, | |
| "loss": 0.0395, | |
| "step": 1587 | |
| }, | |
| { | |
| "epoch": 9.804953560371517, | |
| "grad_norm": 2.8811396061099033, | |
| "learning_rate": 8.132027015523219e-06, | |
| "loss": 0.0371, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 9.811145510835914, | |
| "grad_norm": 2.643361725427002, | |
| "learning_rate": 8.086265164609708e-06, | |
| "loss": 0.0426, | |
| "step": 1589 | |
| }, | |
| { | |
| "epoch": 9.81733746130031, | |
| "grad_norm": 3.7025529170637914, | |
| "learning_rate": 8.040621105977264e-06, | |
| "loss": 0.0373, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 9.823529411764707, | |
| "grad_norm": 3.6884225848392185, | |
| "learning_rate": 7.9950949679017e-06, | |
| "loss": 0.0386, | |
| "step": 1591 | |
| }, | |
| { | |
| "epoch": 9.829721362229103, | |
| "grad_norm": 3.3916461921735164, | |
| "learning_rate": 7.949686878327428e-06, | |
| "loss": 0.0324, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 9.8359133126935, | |
| "grad_norm": 2.8304230940358, | |
| "learning_rate": 7.90439696486705e-06, | |
| "loss": 0.0422, | |
| "step": 1593 | |
| }, | |
| { | |
| "epoch": 9.842105263157894, | |
| "grad_norm": 3.0651290512485727, | |
| "learning_rate": 7.859225354801137e-06, | |
| "loss": 0.0368, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 9.848297213622292, | |
| "grad_norm": 4.475021970118916, | |
| "learning_rate": 7.814172175077738e-06, | |
| "loss": 0.0456, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 9.854489164086687, | |
| "grad_norm": 3.215831651434377, | |
| "learning_rate": 7.769237552312048e-06, | |
| "loss": 0.0339, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 9.860681114551083, | |
| "grad_norm": 4.138337590415728, | |
| "learning_rate": 7.724421612786109e-06, | |
| "loss": 0.0308, | |
| "step": 1597 | |
| }, | |
| { | |
| "epoch": 9.86687306501548, | |
| "grad_norm": 2.4632014500120865, | |
| "learning_rate": 7.679724482448424e-06, | |
| "loss": 0.0305, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 9.873065015479876, | |
| "grad_norm": 2.6731726515248675, | |
| "learning_rate": 7.635146286913586e-06, | |
| "loss": 0.0539, | |
| "step": 1599 | |
| }, | |
| { | |
| "epoch": 9.879256965944272, | |
| "grad_norm": 4.600988512774891, | |
| "learning_rate": 7.59068715146195e-06, | |
| "loss": 0.0279, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 9.885448916408668, | |
| "grad_norm": 3.5572086266550556, | |
| "learning_rate": 7.546347201039255e-06, | |
| "loss": 0.041, | |
| "step": 1601 | |
| }, | |
| { | |
| "epoch": 9.891640866873065, | |
| "grad_norm": 3.0537265767069863, | |
| "learning_rate": 7.502126560256301e-06, | |
| "loss": 0.0459, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 9.897832817337461, | |
| "grad_norm": 3.5419809775843833, | |
| "learning_rate": 7.458025353388593e-06, | |
| "loss": 0.0334, | |
| "step": 1603 | |
| }, | |
| { | |
| "epoch": 9.904024767801857, | |
| "grad_norm": 3.414732524788787, | |
| "learning_rate": 7.414043704375944e-06, | |
| "loss": 0.0489, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 9.910216718266254, | |
| "grad_norm": 3.3256856595567967, | |
| "learning_rate": 7.370181736822229e-06, | |
| "loss": 0.0363, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 9.91640866873065, | |
| "grad_norm": 3.895158991978571, | |
| "learning_rate": 7.326439573994953e-06, | |
| "loss": 0.0327, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 9.922600619195046, | |
| "grad_norm": 2.6763490690043152, | |
| "learning_rate": 7.282817338824893e-06, | |
| "loss": 0.0314, | |
| "step": 1607 | |
| }, | |
| { | |
| "epoch": 9.928792569659443, | |
| "grad_norm": 2.958990660663684, | |
| "learning_rate": 7.2393151539058235e-06, | |
| "loss": 0.0327, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 9.93498452012384, | |
| "grad_norm": 3.6738392405654303, | |
| "learning_rate": 7.195933141494133e-06, | |
| "loss": 0.0367, | |
| "step": 1609 | |
| }, | |
| { | |
| "epoch": 9.941176470588236, | |
| "grad_norm": 2.6964217016336183, | |
| "learning_rate": 7.1526714235084725e-06, | |
| "loss": 0.0328, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 9.947368421052632, | |
| "grad_norm": 4.1020842351654085, | |
| "learning_rate": 7.109530121529439e-06, | |
| "loss": 0.0357, | |
| "step": 1611 | |
| }, | |
| { | |
| "epoch": 9.953560371517028, | |
| "grad_norm": 4.013825911292633, | |
| "learning_rate": 7.066509356799189e-06, | |
| "loss": 0.045, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 9.959752321981425, | |
| "grad_norm": 2.890908035100116, | |
| "learning_rate": 7.023609250221153e-06, | |
| "loss": 0.0383, | |
| "step": 1613 | |
| }, | |
| { | |
| "epoch": 9.965944272445821, | |
| "grad_norm": 2.866464736264314, | |
| "learning_rate": 6.980829922359666e-06, | |
| "loss": 0.0316, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 9.972136222910217, | |
| "grad_norm": 3.532104979673028, | |
| "learning_rate": 6.938171493439622e-06, | |
| "loss": 0.0321, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 9.978328173374614, | |
| "grad_norm": 2.4739025667718315, | |
| "learning_rate": 6.895634083346158e-06, | |
| "loss": 0.0333, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 9.98452012383901, | |
| "grad_norm": 2.686121620365639, | |
| "learning_rate": 6.853217811624313e-06, | |
| "loss": 0.0325, | |
| "step": 1617 | |
| }, | |
| { | |
| "epoch": 9.990712074303406, | |
| "grad_norm": 3.1692020283541273, | |
| "learning_rate": 6.810922797478653e-06, | |
| "loss": 0.0441, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 9.996904024767801, | |
| "grad_norm": 3.084011355069829, | |
| "learning_rate": 6.7687491597729915e-06, | |
| "loss": 0.0358, | |
| "step": 1619 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 3.492275686041303, | |
| "learning_rate": 6.7266970170300315e-06, | |
| "loss": 0.0115, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 10.006191950464396, | |
| "grad_norm": 0.9690041621709453, | |
| "learning_rate": 6.684766487431027e-06, | |
| "loss": 0.0136, | |
| "step": 1621 | |
| }, | |
| { | |
| "epoch": 10.012383900928793, | |
| "grad_norm": 0.6941026113485895, | |
| "learning_rate": 6.642957688815476e-06, | |
| "loss": 0.0105, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 10.018575851393189, | |
| "grad_norm": 1.6892873132285695, | |
| "learning_rate": 6.601270738680721e-06, | |
| "loss": 0.0132, | |
| "step": 1623 | |
| }, | |
| { | |
| "epoch": 10.024767801857585, | |
| "grad_norm": 0.7795458575261905, | |
| "learning_rate": 6.559705754181711e-06, | |
| "loss": 0.0117, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 10.030959752321982, | |
| "grad_norm": 1.816508512253383, | |
| "learning_rate": 6.518262852130625e-06, | |
| "loss": 0.0168, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 10.037151702786378, | |
| "grad_norm": 1.1419893923025035, | |
| "learning_rate": 6.476942148996529e-06, | |
| "loss": 0.0132, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 10.043343653250774, | |
| "grad_norm": 0.8090659548366029, | |
| "learning_rate": 6.435743760905083e-06, | |
| "loss": 0.0095, | |
| "step": 1627 | |
| }, | |
| { | |
| "epoch": 10.04953560371517, | |
| "grad_norm": 0.619249601903476, | |
| "learning_rate": 6.394667803638199e-06, | |
| "loss": 0.0093, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 10.055727554179567, | |
| "grad_norm": 0.7657768966180624, | |
| "learning_rate": 6.353714392633697e-06, | |
| "loss": 0.0104, | |
| "step": 1629 | |
| }, | |
| { | |
| "epoch": 10.061919504643964, | |
| "grad_norm": 0.8324925619707857, | |
| "learning_rate": 6.312883642985013e-06, | |
| "loss": 0.0117, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 10.06811145510836, | |
| "grad_norm": 1.3470052156475658, | |
| "learning_rate": 6.272175669440861e-06, | |
| "loss": 0.0119, | |
| "step": 1631 | |
| }, | |
| { | |
| "epoch": 10.074303405572756, | |
| "grad_norm": 1.1332209729806688, | |
| "learning_rate": 6.2315905864049175e-06, | |
| "loss": 0.011, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 10.08049535603715, | |
| "grad_norm": 0.8623305294648557, | |
| "learning_rate": 6.1911285079354785e-06, | |
| "loss": 0.01, | |
| "step": 1633 | |
| }, | |
| { | |
| "epoch": 10.086687306501547, | |
| "grad_norm": 1.9984087842934972, | |
| "learning_rate": 6.150789547745178e-06, | |
| "loss": 0.0137, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 10.092879256965944, | |
| "grad_norm": 0.797048668269128, | |
| "learning_rate": 6.110573819200605e-06, | |
| "loss": 0.0123, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 10.09907120743034, | |
| "grad_norm": 1.6465166602549526, | |
| "learning_rate": 6.070481435322062e-06, | |
| "loss": 0.0174, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 10.105263157894736, | |
| "grad_norm": 0.9915252884551151, | |
| "learning_rate": 6.030512508783187e-06, | |
| "loss": 0.0117, | |
| "step": 1637 | |
| }, | |
| { | |
| "epoch": 10.111455108359133, | |
| "grad_norm": 0.8757703472508664, | |
| "learning_rate": 5.990667151910678e-06, | |
| "loss": 0.0123, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 10.117647058823529, | |
| "grad_norm": 0.6976670441674919, | |
| "learning_rate": 5.950945476683956e-06, | |
| "loss": 0.0102, | |
| "step": 1639 | |
| }, | |
| { | |
| "epoch": 10.123839009287925, | |
| "grad_norm": 1.671517191093078, | |
| "learning_rate": 5.911347594734823e-06, | |
| "loss": 0.0139, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 10.130030959752322, | |
| "grad_norm": 1.9299239166858515, | |
| "learning_rate": 5.871873617347217e-06, | |
| "loss": 0.0133, | |
| "step": 1641 | |
| }, | |
| { | |
| "epoch": 10.136222910216718, | |
| "grad_norm": 1.2988206835620129, | |
| "learning_rate": 5.832523655456845e-06, | |
| "loss": 0.0153, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 10.142414860681114, | |
| "grad_norm": 1.4569247038428583, | |
| "learning_rate": 5.793297819650884e-06, | |
| "loss": 0.0127, | |
| "step": 1643 | |
| }, | |
| { | |
| "epoch": 10.14860681114551, | |
| "grad_norm": 0.7816795380471435, | |
| "learning_rate": 5.754196220167679e-06, | |
| "loss": 0.0098, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 10.154798761609907, | |
| "grad_norm": 1.3591968793264988, | |
| "learning_rate": 5.715218966896435e-06, | |
| "loss": 0.0133, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 10.160990712074303, | |
| "grad_norm": 0.6915571090654116, | |
| "learning_rate": 5.676366169376873e-06, | |
| "loss": 0.0109, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 10.1671826625387, | |
| "grad_norm": 1.1901382752213034, | |
| "learning_rate": 5.637637936798979e-06, | |
| "loss": 0.0105, | |
| "step": 1647 | |
| }, | |
| { | |
| "epoch": 10.173374613003096, | |
| "grad_norm": 0.697074801951405, | |
| "learning_rate": 5.599034378002649e-06, | |
| "loss": 0.0105, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 10.179566563467493, | |
| "grad_norm": 1.5164025066944178, | |
| "learning_rate": 5.560555601477418e-06, | |
| "loss": 0.014, | |
| "step": 1649 | |
| }, | |
| { | |
| "epoch": 10.185758513931889, | |
| "grad_norm": 0.9434268887247322, | |
| "learning_rate": 5.522201715362135e-06, | |
| "loss": 0.0092, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 10.191950464396285, | |
| "grad_norm": 0.851637419723935, | |
| "learning_rate": 5.483972827444644e-06, | |
| "loss": 0.0101, | |
| "step": 1651 | |
| }, | |
| { | |
| "epoch": 10.198142414860682, | |
| "grad_norm": 0.5702932276533784, | |
| "learning_rate": 5.4458690451615225e-06, | |
| "loss": 0.0073, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 10.204334365325078, | |
| "grad_norm": 1.0316361049272975, | |
| "learning_rate": 5.40789047559776e-06, | |
| "loss": 0.0104, | |
| "step": 1653 | |
| }, | |
| { | |
| "epoch": 10.210526315789474, | |
| "grad_norm": 1.5701282669378862, | |
| "learning_rate": 5.370037225486413e-06, | |
| "loss": 0.0138, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 10.21671826625387, | |
| "grad_norm": 1.614278502654183, | |
| "learning_rate": 5.332309401208408e-06, | |
| "loss": 0.0109, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 10.222910216718267, | |
| "grad_norm": 1.5812343173850434, | |
| "learning_rate": 5.294707108792146e-06, | |
| "loss": 0.0115, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 10.229102167182663, | |
| "grad_norm": 0.7741625900478416, | |
| "learning_rate": 5.257230453913237e-06, | |
| "loss": 0.0087, | |
| "step": 1657 | |
| }, | |
| { | |
| "epoch": 10.235294117647058, | |
| "grad_norm": 0.8181768770881074, | |
| "learning_rate": 5.219879541894213e-06, | |
| "loss": 0.0114, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 10.241486068111454, | |
| "grad_norm": 0.5613606941784166, | |
| "learning_rate": 5.182654477704229e-06, | |
| "loss": 0.0068, | |
| "step": 1659 | |
| }, | |
| { | |
| "epoch": 10.24767801857585, | |
| "grad_norm": 1.4465625168944913, | |
| "learning_rate": 5.145555365958754e-06, | |
| "loss": 0.0129, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 10.253869969040247, | |
| "grad_norm": 1.264523084436169, | |
| "learning_rate": 5.108582310919302e-06, | |
| "loss": 0.0115, | |
| "step": 1661 | |
| }, | |
| { | |
| "epoch": 10.260061919504643, | |
| "grad_norm": 1.5695424386876935, | |
| "learning_rate": 5.071735416493095e-06, | |
| "loss": 0.014, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 10.26625386996904, | |
| "grad_norm": 1.5234094429714513, | |
| "learning_rate": 5.0350147862328275e-06, | |
| "loss": 0.0118, | |
| "step": 1663 | |
| }, | |
| { | |
| "epoch": 10.272445820433436, | |
| "grad_norm": 1.1168371246093773, | |
| "learning_rate": 4.998420523336344e-06, | |
| "loss": 0.0081, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 10.278637770897832, | |
| "grad_norm": 0.7440344852931872, | |
| "learning_rate": 4.961952730646319e-06, | |
| "loss": 0.0086, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 10.284829721362229, | |
| "grad_norm": 1.8493349338190865, | |
| "learning_rate": 4.9256115106500575e-06, | |
| "loss": 0.0149, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 10.291021671826625, | |
| "grad_norm": 1.3877650925456666, | |
| "learning_rate": 4.889396965479115e-06, | |
| "loss": 0.0114, | |
| "step": 1667 | |
| }, | |
| { | |
| "epoch": 10.297213622291022, | |
| "grad_norm": 0.7281950423300454, | |
| "learning_rate": 4.853309196909045e-06, | |
| "loss": 0.0096, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 10.303405572755418, | |
| "grad_norm": 2.8181618879647354, | |
| "learning_rate": 4.817348306359121e-06, | |
| "loss": 0.0123, | |
| "step": 1669 | |
| }, | |
| { | |
| "epoch": 10.309597523219814, | |
| "grad_norm": 0.5273928142352734, | |
| "learning_rate": 4.78151439489205e-06, | |
| "loss": 0.0075, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 10.31578947368421, | |
| "grad_norm": 1.2995849665706596, | |
| "learning_rate": 4.7458075632136776e-06, | |
| "loss": 0.0116, | |
| "step": 1671 | |
| }, | |
| { | |
| "epoch": 10.321981424148607, | |
| "grad_norm": 1.5898543976941963, | |
| "learning_rate": 4.710227911672721e-06, | |
| "loss": 0.013, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 10.328173374613003, | |
| "grad_norm": 1.3417056303557735, | |
| "learning_rate": 4.674775540260456e-06, | |
| "loss": 0.0103, | |
| "step": 1673 | |
| }, | |
| { | |
| "epoch": 10.3343653250774, | |
| "grad_norm": 1.4685272029621523, | |
| "learning_rate": 4.6394505486104685e-06, | |
| "loss": 0.0131, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 10.340557275541796, | |
| "grad_norm": 0.8277822917699998, | |
| "learning_rate": 4.604253035998379e-06, | |
| "loss": 0.0116, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 10.346749226006192, | |
| "grad_norm": 1.1121863929552394, | |
| "learning_rate": 4.569183101341501e-06, | |
| "loss": 0.0114, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 10.352941176470589, | |
| "grad_norm": 0.677948454774533, | |
| "learning_rate": 4.534240843198662e-06, | |
| "loss": 0.0091, | |
| "step": 1677 | |
| }, | |
| { | |
| "epoch": 10.359133126934985, | |
| "grad_norm": 0.8533445134270156, | |
| "learning_rate": 4.499426359769859e-06, | |
| "loss": 0.0095, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 10.365325077399381, | |
| "grad_norm": 1.0518039551642777, | |
| "learning_rate": 4.464739748895963e-06, | |
| "loss": 0.0104, | |
| "step": 1679 | |
| }, | |
| { | |
| "epoch": 10.371517027863778, | |
| "grad_norm": 1.4222729965189793, | |
| "learning_rate": 4.430181108058517e-06, | |
| "loss": 0.0116, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 10.377708978328174, | |
| "grad_norm": 0.7831784087988597, | |
| "learning_rate": 4.395750534379411e-06, | |
| "loss": 0.0088, | |
| "step": 1681 | |
| }, | |
| { | |
| "epoch": 10.38390092879257, | |
| "grad_norm": 0.7708963964507719, | |
| "learning_rate": 4.361448124620621e-06, | |
| "loss": 0.0097, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 10.390092879256965, | |
| "grad_norm": 0.8954947088275224, | |
| "learning_rate": 4.327273975183949e-06, | |
| "loss": 0.0112, | |
| "step": 1683 | |
| }, | |
| { | |
| "epoch": 10.396284829721361, | |
| "grad_norm": 1.8242329830813873, | |
| "learning_rate": 4.293228182110704e-06, | |
| "loss": 0.0099, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 10.402476780185758, | |
| "grad_norm": 3.6333512677727655, | |
| "learning_rate": 4.259310841081515e-06, | |
| "loss": 0.0204, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 10.408668730650154, | |
| "grad_norm": 1.892780654876686, | |
| "learning_rate": 4.225522047415992e-06, | |
| "loss": 0.0189, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 10.41486068111455, | |
| "grad_norm": 1.7700319894140126, | |
| "learning_rate": 4.191861896072457e-06, | |
| "loss": 0.0118, | |
| "step": 1687 | |
| }, | |
| { | |
| "epoch": 10.421052631578947, | |
| "grad_norm": 0.9200528055389516, | |
| "learning_rate": 4.1583304816477706e-06, | |
| "loss": 0.0125, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 10.427244582043343, | |
| "grad_norm": 0.7789740816150398, | |
| "learning_rate": 4.12492789837694e-06, | |
| "loss": 0.0109, | |
| "step": 1689 | |
| }, | |
| { | |
| "epoch": 10.43343653250774, | |
| "grad_norm": 0.783564421561771, | |
| "learning_rate": 4.091654240132925e-06, | |
| "loss": 0.0116, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 10.439628482972136, | |
| "grad_norm": 0.6118045323063145, | |
| "learning_rate": 4.058509600426358e-06, | |
| "loss": 0.0077, | |
| "step": 1691 | |
| }, | |
| { | |
| "epoch": 10.445820433436532, | |
| "grad_norm": 0.7260828388384113, | |
| "learning_rate": 4.0254940724053005e-06, | |
| "loss": 0.0093, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 10.452012383900929, | |
| "grad_norm": 2.228638075154875, | |
| "learning_rate": 3.992607748854954e-06, | |
| "loss": 0.0181, | |
| "step": 1693 | |
| }, | |
| { | |
| "epoch": 10.458204334365325, | |
| "grad_norm": 1.0717379632301736, | |
| "learning_rate": 3.959850722197411e-06, | |
| "loss": 0.0098, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 10.464396284829721, | |
| "grad_norm": 1.3420482541310115, | |
| "learning_rate": 3.927223084491388e-06, | |
| "loss": 0.0117, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 10.470588235294118, | |
| "grad_norm": 0.6950594630885271, | |
| "learning_rate": 3.8947249274319805e-06, | |
| "loss": 0.0076, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 10.476780185758514, | |
| "grad_norm": 1.702546172738537, | |
| "learning_rate": 3.86235634235041e-06, | |
| "loss": 0.0126, | |
| "step": 1697 | |
| }, | |
| { | |
| "epoch": 10.48297213622291, | |
| "grad_norm": 0.9719819269381079, | |
| "learning_rate": 3.830117420213713e-06, | |
| "loss": 0.0089, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 10.489164086687307, | |
| "grad_norm": 1.4004291750343034, | |
| "learning_rate": 3.798008251624585e-06, | |
| "loss": 0.0132, | |
| "step": 1699 | |
| }, | |
| { | |
| "epoch": 10.495356037151703, | |
| "grad_norm": 0.7473199645427587, | |
| "learning_rate": 3.7660289268210415e-06, | |
| "loss": 0.008, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 10.5015479876161, | |
| "grad_norm": 0.9275640876435634, | |
| "learning_rate": 3.7341795356761688e-06, | |
| "loss": 0.0091, | |
| "step": 1701 | |
| }, | |
| { | |
| "epoch": 10.507739938080496, | |
| "grad_norm": 0.6574915351770422, | |
| "learning_rate": 3.70246016769793e-06, | |
| "loss": 0.0079, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 10.513931888544892, | |
| "grad_norm": 0.7561543389827127, | |
| "learning_rate": 3.6708709120288565e-06, | |
| "loss": 0.0087, | |
| "step": 1703 | |
| }, | |
| { | |
| "epoch": 10.520123839009289, | |
| "grad_norm": 0.9256024134932721, | |
| "learning_rate": 3.639411857445829e-06, | |
| "loss": 0.012, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 10.526315789473685, | |
| "grad_norm": 1.3251515213309393, | |
| "learning_rate": 3.6080830923598263e-06, | |
| "loss": 0.0106, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 10.532507739938081, | |
| "grad_norm": 0.804076898474573, | |
| "learning_rate": 3.5768847048156305e-06, | |
| "loss": 0.0076, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 10.538699690402478, | |
| "grad_norm": 0.5449255097242203, | |
| "learning_rate": 3.5458167824916566e-06, | |
| "loss": 0.0076, | |
| "step": 1707 | |
| }, | |
| { | |
| "epoch": 10.544891640866872, | |
| "grad_norm": 0.8837706768356558, | |
| "learning_rate": 3.5148794126996563e-06, | |
| "loss": 0.0112, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 10.551083591331269, | |
| "grad_norm": 0.7222977773251738, | |
| "learning_rate": 3.484072682384465e-06, | |
| "loss": 0.0085, | |
| "step": 1709 | |
| }, | |
| { | |
| "epoch": 10.557275541795665, | |
| "grad_norm": 1.0329452823258398, | |
| "learning_rate": 3.4533966781237992e-06, | |
| "loss": 0.0118, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 10.563467492260061, | |
| "grad_norm": 0.9527789813475251, | |
| "learning_rate": 3.422851486127987e-06, | |
| "loss": 0.01, | |
| "step": 1711 | |
| }, | |
| { | |
| "epoch": 10.569659442724458, | |
| "grad_norm": 0.8268111323007847, | |
| "learning_rate": 3.3924371922397003e-06, | |
| "loss": 0.0112, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 10.575851393188854, | |
| "grad_norm": 0.6727829069778749, | |
| "learning_rate": 3.3621538819337772e-06, | |
| "loss": 0.0102, | |
| "step": 1713 | |
| }, | |
| { | |
| "epoch": 10.58204334365325, | |
| "grad_norm": 0.6973365017429423, | |
| "learning_rate": 3.332001640316923e-06, | |
| "loss": 0.0085, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 10.588235294117647, | |
| "grad_norm": 1.2335275863619866, | |
| "learning_rate": 3.3019805521275095e-06, | |
| "loss": 0.0119, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 10.594427244582043, | |
| "grad_norm": 0.8442287627038453, | |
| "learning_rate": 3.272090701735314e-06, | |
| "loss": 0.0109, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 10.60061919504644, | |
| "grad_norm": 0.7037863282128368, | |
| "learning_rate": 3.242332173141277e-06, | |
| "loss": 0.0077, | |
| "step": 1717 | |
| }, | |
| { | |
| "epoch": 10.606811145510836, | |
| "grad_norm": 2.0129171525647354, | |
| "learning_rate": 3.212705049977299e-06, | |
| "loss": 0.0129, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 10.613003095975232, | |
| "grad_norm": 1.6794016066259494, | |
| "learning_rate": 3.183209415505978e-06, | |
| "loss": 0.0158, | |
| "step": 1719 | |
| }, | |
| { | |
| "epoch": 10.619195046439629, | |
| "grad_norm": 0.8047045412104622, | |
| "learning_rate": 3.153845352620377e-06, | |
| "loss": 0.0098, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 10.625386996904025, | |
| "grad_norm": 0.8135051004087362, | |
| "learning_rate": 3.1246129438438076e-06, | |
| "loss": 0.01, | |
| "step": 1721 | |
| }, | |
| { | |
| "epoch": 10.631578947368421, | |
| "grad_norm": 1.169581392676557, | |
| "learning_rate": 3.095512271329587e-06, | |
| "loss": 0.0099, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 10.637770897832818, | |
| "grad_norm": 0.7159938157722547, | |
| "learning_rate": 3.0665434168607842e-06, | |
| "loss": 0.0084, | |
| "step": 1723 | |
| }, | |
| { | |
| "epoch": 10.643962848297214, | |
| "grad_norm": 1.5038994222527753, | |
| "learning_rate": 3.0377064618500426e-06, | |
| "loss": 0.0123, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 10.65015479876161, | |
| "grad_norm": 0.9341949537858699, | |
| "learning_rate": 3.009001487339308e-06, | |
| "loss": 0.0094, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 10.656346749226007, | |
| "grad_norm": 1.059506028399342, | |
| "learning_rate": 2.9804285739996053e-06, | |
| "loss": 0.0091, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 10.662538699690403, | |
| "grad_norm": 0.4376975526720586, | |
| "learning_rate": 2.9519878021308622e-06, | |
| "loss": 0.0059, | |
| "step": 1727 | |
| }, | |
| { | |
| "epoch": 10.6687306501548, | |
| "grad_norm": 0.5828319926747679, | |
| "learning_rate": 2.9236792516615817e-06, | |
| "loss": 0.0077, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 10.674922600619196, | |
| "grad_norm": 0.7758125938963785, | |
| "learning_rate": 2.895503002148725e-06, | |
| "loss": 0.0107, | |
| "step": 1729 | |
| }, | |
| { | |
| "epoch": 10.681114551083592, | |
| "grad_norm": 0.9203794960207291, | |
| "learning_rate": 2.867459132777417e-06, | |
| "loss": 0.0103, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 10.687306501547988, | |
| "grad_norm": 0.8585940595715308, | |
| "learning_rate": 2.8395477223607693e-06, | |
| "loss": 0.0086, | |
| "step": 1731 | |
| }, | |
| { | |
| "epoch": 10.693498452012385, | |
| "grad_norm": 0.7370337401926742, | |
| "learning_rate": 2.811768849339619e-06, | |
| "loss": 0.0095, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 10.69969040247678, | |
| "grad_norm": 0.7979822392217546, | |
| "learning_rate": 2.7841225917823345e-06, | |
| "loss": 0.0094, | |
| "step": 1733 | |
| }, | |
| { | |
| "epoch": 10.705882352941176, | |
| "grad_norm": 1.0772577268099952, | |
| "learning_rate": 2.7566090273845824e-06, | |
| "loss": 0.0101, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 10.712074303405572, | |
| "grad_norm": 0.8668097368420958, | |
| "learning_rate": 2.729228233469117e-06, | |
| "loss": 0.0106, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 10.718266253869968, | |
| "grad_norm": 0.6870847423896851, | |
| "learning_rate": 2.7019802869855783e-06, | |
| "loss": 0.009, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 10.724458204334365, | |
| "grad_norm": 0.7982906139809085, | |
| "learning_rate": 2.6748652645102177e-06, | |
| "loss": 0.0112, | |
| "step": 1737 | |
| }, | |
| { | |
| "epoch": 10.730650154798761, | |
| "grad_norm": 0.7354991019101021, | |
| "learning_rate": 2.647883242245769e-06, | |
| "loss": 0.0093, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 10.736842105263158, | |
| "grad_norm": 0.9678810687189697, | |
| "learning_rate": 2.6210342960211744e-06, | |
| "loss": 0.0091, | |
| "step": 1739 | |
| }, | |
| { | |
| "epoch": 10.743034055727554, | |
| "grad_norm": 0.6660482019733218, | |
| "learning_rate": 2.5943185012913594e-06, | |
| "loss": 0.0095, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 10.74922600619195, | |
| "grad_norm": 1.06839550994224, | |
| "learning_rate": 2.567735933137083e-06, | |
| "loss": 0.0088, | |
| "step": 1741 | |
| }, | |
| { | |
| "epoch": 10.755417956656347, | |
| "grad_norm": 0.9426678760153843, | |
| "learning_rate": 2.5412866662646697e-06, | |
| "loss": 0.008, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 10.761609907120743, | |
| "grad_norm": 0.7572329345990066, | |
| "learning_rate": 2.5149707750058314e-06, | |
| "loss": 0.0093, | |
| "step": 1743 | |
| }, | |
| { | |
| "epoch": 10.76780185758514, | |
| "grad_norm": 1.1459689282860193, | |
| "learning_rate": 2.4887883333174435e-06, | |
| "loss": 0.0102, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 10.773993808049536, | |
| "grad_norm": 0.7313816351516814, | |
| "learning_rate": 2.4627394147813343e-06, | |
| "loss": 0.0073, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 10.780185758513932, | |
| "grad_norm": 1.207241558988607, | |
| "learning_rate": 2.4368240926041008e-06, | |
| "loss": 0.0087, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 10.786377708978328, | |
| "grad_norm": 0.7977655013840111, | |
| "learning_rate": 2.411042439616873e-06, | |
| "loss": 0.0078, | |
| "step": 1747 | |
| }, | |
| { | |
| "epoch": 10.792569659442725, | |
| "grad_norm": 0.890577896345556, | |
| "learning_rate": 2.3853945282751257e-06, | |
| "loss": 0.0113, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 10.798761609907121, | |
| "grad_norm": 0.8583291783885854, | |
| "learning_rate": 2.3598804306584843e-06, | |
| "loss": 0.0092, | |
| "step": 1749 | |
| }, | |
| { | |
| "epoch": 10.804953560371517, | |
| "grad_norm": 1.0913490870442912, | |
| "learning_rate": 2.334500218470509e-06, | |
| "loss": 0.0114, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 10.811145510835914, | |
| "grad_norm": 1.0439131308846614, | |
| "learning_rate": 2.309253963038477e-06, | |
| "loss": 0.0101, | |
| "step": 1751 | |
| }, | |
| { | |
| "epoch": 10.81733746130031, | |
| "grad_norm": 0.8933322010979203, | |
| "learning_rate": 2.284141735313211e-06, | |
| "loss": 0.0096, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 10.823529411764707, | |
| "grad_norm": 0.7395638861766168, | |
| "learning_rate": 2.2591636058688804e-06, | |
| "loss": 0.0084, | |
| "step": 1753 | |
| }, | |
| { | |
| "epoch": 10.829721362229103, | |
| "grad_norm": 0.9780330229569952, | |
| "learning_rate": 2.2343196449027716e-06, | |
| "loss": 0.0126, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 10.8359133126935, | |
| "grad_norm": 0.952534486696127, | |
| "learning_rate": 2.209609922235134e-06, | |
| "loss": 0.0114, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 10.842105263157894, | |
| "grad_norm": 1.0367024109289174, | |
| "learning_rate": 2.185034507308925e-06, | |
| "loss": 0.0081, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 10.848297213622292, | |
| "grad_norm": 1.4665715967848132, | |
| "learning_rate": 2.1605934691896867e-06, | |
| "loss": 0.0123, | |
| "step": 1757 | |
| }, | |
| { | |
| "epoch": 10.854489164086687, | |
| "grad_norm": 0.4968073457268509, | |
| "learning_rate": 2.136286876565302e-06, | |
| "loss": 0.0066, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 10.860681114551083, | |
| "grad_norm": 0.7006736750841475, | |
| "learning_rate": 2.1121147977457954e-06, | |
| "loss": 0.0079, | |
| "step": 1759 | |
| }, | |
| { | |
| "epoch": 10.86687306501548, | |
| "grad_norm": 0.9662755200351817, | |
| "learning_rate": 2.0880773006631935e-06, | |
| "loss": 0.0092, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 10.873065015479876, | |
| "grad_norm": 0.639567274002592, | |
| "learning_rate": 2.0641744528712925e-06, | |
| "loss": 0.0076, | |
| "step": 1761 | |
| }, | |
| { | |
| "epoch": 10.879256965944272, | |
| "grad_norm": 0.6402348827422334, | |
| "learning_rate": 2.0404063215454515e-06, | |
| "loss": 0.0063, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 10.885448916408668, | |
| "grad_norm": 1.4081204793940463, | |
| "learning_rate": 2.016772973482456e-06, | |
| "loss": 0.0123, | |
| "step": 1763 | |
| }, | |
| { | |
| "epoch": 10.891640866873065, | |
| "grad_norm": 0.6397554058823934, | |
| "learning_rate": 1.993274475100293e-06, | |
| "loss": 0.0079, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 10.897832817337461, | |
| "grad_norm": 0.8403423770212202, | |
| "learning_rate": 1.9699108924379818e-06, | |
| "loss": 0.0092, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 10.904024767801857, | |
| "grad_norm": 0.700545288483362, | |
| "learning_rate": 1.9466822911553774e-06, | |
| "loss": 0.0084, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 10.910216718266254, | |
| "grad_norm": 0.8071855492769477, | |
| "learning_rate": 1.9235887365329776e-06, | |
| "loss": 0.0078, | |
| "step": 1767 | |
| }, | |
| { | |
| "epoch": 10.91640866873065, | |
| "grad_norm": 0.9467118760989457, | |
| "learning_rate": 1.9006302934717723e-06, | |
| "loss": 0.0073, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 10.922600619195046, | |
| "grad_norm": 0.9326749378450487, | |
| "learning_rate": 1.8778070264930281e-06, | |
| "loss": 0.0115, | |
| "step": 1769 | |
| }, | |
| { | |
| "epoch": 10.928792569659443, | |
| "grad_norm": 0.9362248437621925, | |
| "learning_rate": 1.8551189997381147e-06, | |
| "loss": 0.0091, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 10.93498452012384, | |
| "grad_norm": 0.7116804801712812, | |
| "learning_rate": 1.832566276968345e-06, | |
| "loss": 0.0088, | |
| "step": 1771 | |
| }, | |
| { | |
| "epoch": 10.941176470588236, | |
| "grad_norm": 1.1736566878259078, | |
| "learning_rate": 1.8101489215647803e-06, | |
| "loss": 0.0123, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 10.947368421052632, | |
| "grad_norm": 1.3595920924818499, | |
| "learning_rate": 1.7878669965280314e-06, | |
| "loss": 0.0106, | |
| "step": 1773 | |
| }, | |
| { | |
| "epoch": 10.953560371517028, | |
| "grad_norm": 0.6260934080310849, | |
| "learning_rate": 1.7657205644781128e-06, | |
| "loss": 0.0074, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 10.959752321981425, | |
| "grad_norm": 1.1283883512215795, | |
| "learning_rate": 1.7437096876542713e-06, | |
| "loss": 0.0094, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 10.965944272445821, | |
| "grad_norm": 1.0216294157436547, | |
| "learning_rate": 1.7218344279147702e-06, | |
| "loss": 0.0098, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 10.972136222910217, | |
| "grad_norm": 1.574604863160782, | |
| "learning_rate": 1.7000948467367717e-06, | |
| "loss": 0.011, | |
| "step": 1777 | |
| }, | |
| { | |
| "epoch": 10.978328173374614, | |
| "grad_norm": 0.6374174964157301, | |
| "learning_rate": 1.678491005216093e-06, | |
| "loss": 0.0077, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 10.98452012383901, | |
| "grad_norm": 0.6088874624769877, | |
| "learning_rate": 1.657022964067112e-06, | |
| "loss": 0.007, | |
| "step": 1779 | |
| }, | |
| { | |
| "epoch": 10.990712074303406, | |
| "grad_norm": 1.0079770500653065, | |
| "learning_rate": 1.6356907836225565e-06, | |
| "loss": 0.0117, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 10.996904024767801, | |
| "grad_norm": 0.5552932658491616, | |
| "learning_rate": 1.6144945238332986e-06, | |
| "loss": 0.0066, | |
| "step": 1781 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 0.5552932658491616, | |
| "learning_rate": 1.5934342442682826e-06, | |
| "loss": 0.0035, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 11.006191950464396, | |
| "grad_norm": 0.8590230780171667, | |
| "learning_rate": 1.5725100041142692e-06, | |
| "loss": 0.0081, | |
| "step": 1783 | |
| }, | |
| { | |
| "epoch": 11.012383900928793, | |
| "grad_norm": 1.9743623604731195, | |
| "learning_rate": 1.5517218621756924e-06, | |
| "loss": 0.0084, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 11.018575851393189, | |
| "grad_norm": 0.35183110478447976, | |
| "learning_rate": 1.5310698768745247e-06, | |
| "loss": 0.0051, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 11.024767801857585, | |
| "grad_norm": 0.6042641844577623, | |
| "learning_rate": 1.5105541062500839e-06, | |
| "loss": 0.0066, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 11.030959752321982, | |
| "grad_norm": 0.5993920654526943, | |
| "learning_rate": 1.4901746079588553e-06, | |
| "loss": 0.0094, | |
| "step": 1787 | |
| }, | |
| { | |
| "epoch": 11.037151702786378, | |
| "grad_norm": 0.54204549668726, | |
| "learning_rate": 1.4699314392743969e-06, | |
| "loss": 0.0068, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 11.043343653250774, | |
| "grad_norm": 0.39963210647239444, | |
| "learning_rate": 1.4498246570870844e-06, | |
| "loss": 0.0072, | |
| "step": 1789 | |
| }, | |
| { | |
| "epoch": 11.04953560371517, | |
| "grad_norm": 0.9314148398330484, | |
| "learning_rate": 1.4298543179040337e-06, | |
| "loss": 0.0062, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 11.055727554179567, | |
| "grad_norm": 0.3696579757438963, | |
| "learning_rate": 1.4100204778488946e-06, | |
| "loss": 0.0074, | |
| "step": 1791 | |
| }, | |
| { | |
| "epoch": 11.061919504643964, | |
| "grad_norm": 0.7288361787060351, | |
| "learning_rate": 1.390323192661691e-06, | |
| "loss": 0.0087, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 11.06811145510836, | |
| "grad_norm": 0.581066884072162, | |
| "learning_rate": 1.3707625176987149e-06, | |
| "loss": 0.0069, | |
| "step": 1793 | |
| }, | |
| { | |
| "epoch": 11.074303405572756, | |
| "grad_norm": 0.5857256793638341, | |
| "learning_rate": 1.3513385079323094e-06, | |
| "loss": 0.0076, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 11.08049535603715, | |
| "grad_norm": 0.5199433056180636, | |
| "learning_rate": 1.3320512179507526e-06, | |
| "loss": 0.0064, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 11.086687306501547, | |
| "grad_norm": 0.6046228576130303, | |
| "learning_rate": 1.3129007019580752e-06, | |
| "loss": 0.0087, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 11.092879256965944, | |
| "grad_norm": 0.581548624238609, | |
| "learning_rate": 1.293887013773959e-06, | |
| "loss": 0.0078, | |
| "step": 1797 | |
| }, | |
| { | |
| "epoch": 11.09907120743034, | |
| "grad_norm": 0.5301680741343029, | |
| "learning_rate": 1.275010206833499e-06, | |
| "loss": 0.0079, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 11.105263157894736, | |
| "grad_norm": 0.42022687774794415, | |
| "learning_rate": 1.2562703341871707e-06, | |
| "loss": 0.0061, | |
| "step": 1799 | |
| }, | |
| { | |
| "epoch": 11.111455108359133, | |
| "grad_norm": 0.665077350032754, | |
| "learning_rate": 1.2376674485005568e-06, | |
| "loss": 0.0072, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 11.117647058823529, | |
| "grad_norm": 0.5579968541747495, | |
| "learning_rate": 1.2192016020542984e-06, | |
| "loss": 0.0084, | |
| "step": 1801 | |
| }, | |
| { | |
| "epoch": 11.123839009287925, | |
| "grad_norm": 0.4954904144880409, | |
| "learning_rate": 1.2008728467438946e-06, | |
| "loss": 0.0056, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 11.130030959752322, | |
| "grad_norm": 0.44858005274599605, | |
| "learning_rate": 1.1826812340795523e-06, | |
| "loss": 0.009, | |
| "step": 1803 | |
| }, | |
| { | |
| "epoch": 11.136222910216718, | |
| "grad_norm": 0.804259291040737, | |
| "learning_rate": 1.1646268151860984e-06, | |
| "loss": 0.0067, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 11.142414860681114, | |
| "grad_norm": 0.6782383183205587, | |
| "learning_rate": 1.1467096408027678e-06, | |
| "loss": 0.0064, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 11.14860681114551, | |
| "grad_norm": 0.42505724970819314, | |
| "learning_rate": 1.1289297612830985e-06, | |
| "loss": 0.0066, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 11.154798761609907, | |
| "grad_norm": 0.5140160581787362, | |
| "learning_rate": 1.1112872265947815e-06, | |
| "loss": 0.0055, | |
| "step": 1807 | |
| }, | |
| { | |
| "epoch": 11.160990712074303, | |
| "grad_norm": 0.43576435472775915, | |
| "learning_rate": 1.0937820863195225e-06, | |
| "loss": 0.007, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 11.1671826625387, | |
| "grad_norm": 0.593651067275537, | |
| "learning_rate": 1.0764143896528966e-06, | |
| "loss": 0.0084, | |
| "step": 1809 | |
| }, | |
| { | |
| "epoch": 11.173374613003096, | |
| "grad_norm": 0.4183258589178314, | |
| "learning_rate": 1.0591841854042218e-06, | |
| "loss": 0.0062, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 11.179566563467493, | |
| "grad_norm": 0.6129626912513965, | |
| "learning_rate": 1.0420915219964023e-06, | |
| "loss": 0.0056, | |
| "step": 1811 | |
| }, | |
| { | |
| "epoch": 11.185758513931889, | |
| "grad_norm": 0.7988572762895868, | |
| "learning_rate": 1.0251364474658187e-06, | |
| "loss": 0.0067, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 11.191950464396285, | |
| "grad_norm": 0.7057642530613543, | |
| "learning_rate": 1.0083190094621719e-06, | |
| "loss": 0.0073, | |
| "step": 1813 | |
| }, | |
| { | |
| "epoch": 11.198142414860682, | |
| "grad_norm": 0.42926277169282456, | |
| "learning_rate": 9.91639255248339e-07, | |
| "loss": 0.0055, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 11.204334365325078, | |
| "grad_norm": 0.41431734580291646, | |
| "learning_rate": 9.750972317002949e-07, | |
| "loss": 0.0065, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 11.210526315789474, | |
| "grad_norm": 0.5919073415860816, | |
| "learning_rate": 9.586929853069138e-07, | |
| "loss": 0.0091, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 11.21671826625387, | |
| "grad_norm": 0.8579825741603267, | |
| "learning_rate": 9.424265621698736e-07, | |
| "loss": 0.0077, | |
| "step": 1817 | |
| }, | |
| { | |
| "epoch": 11.222910216718267, | |
| "grad_norm": 0.3910087337186186, | |
| "learning_rate": 9.262980080035288e-07, | |
| "loss": 0.0086, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 11.229102167182663, | |
| "grad_norm": 0.5442690307510305, | |
| "learning_rate": 9.103073681347607e-07, | |
| "loss": 0.0059, | |
| "step": 1819 | |
| }, | |
| { | |
| "epoch": 11.235294117647058, | |
| "grad_norm": 0.4656439315898283, | |
| "learning_rate": 8.944546875028714e-07, | |
| "loss": 0.0074, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 11.241486068111454, | |
| "grad_norm": 0.5839593061344903, | |
| "learning_rate": 8.787400106594568e-07, | |
| "loss": 0.0073, | |
| "step": 1821 | |
| }, | |
| { | |
| "epoch": 11.24767801857585, | |
| "grad_norm": 0.5211620061867827, | |
| "learning_rate": 8.631633817682505e-07, | |
| "loss": 0.008, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 11.253869969040247, | |
| "grad_norm": 0.565491467227642, | |
| "learning_rate": 8.477248446050523e-07, | |
| "loss": 0.0074, | |
| "step": 1823 | |
| }, | |
| { | |
| "epoch": 11.260061919504643, | |
| "grad_norm": 0.6939557316334681, | |
| "learning_rate": 8.3242444255755e-07, | |
| "loss": 0.0067, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 11.26625386996904, | |
| "grad_norm": 0.4705178104025061, | |
| "learning_rate": 8.172622186252421e-07, | |
| "loss": 0.005, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 11.272445820433436, | |
| "grad_norm": 0.34070656855140247, | |
| "learning_rate": 8.02238215419282e-07, | |
| "loss": 0.0062, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 11.278637770897832, | |
| "grad_norm": 0.4739816540016086, | |
| "learning_rate": 7.873524751624006e-07, | |
| "loss": 0.0071, | |
| "step": 1827 | |
| }, | |
| { | |
| "epoch": 11.284829721362229, | |
| "grad_norm": 0.5132177212112234, | |
| "learning_rate": 7.72605039688723e-07, | |
| "loss": 0.006, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 11.291021671826625, | |
| "grad_norm": 0.39156911709743286, | |
| "learning_rate": 7.579959504437184e-07, | |
| "loss": 0.0062, | |
| "step": 1829 | |
| }, | |
| { | |
| "epoch": 11.297213622291022, | |
| "grad_norm": 0.43103836317419303, | |
| "learning_rate": 7.435252484840505e-07, | |
| "loss": 0.0058, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 11.303405572755418, | |
| "grad_norm": 0.5129600013904935, | |
| "learning_rate": 7.291929744774495e-07, | |
| "loss": 0.0071, | |
| "step": 1831 | |
| }, | |
| { | |
| "epoch": 11.309597523219814, | |
| "grad_norm": 0.640816840030639, | |
| "learning_rate": 7.149991687026514e-07, | |
| "loss": 0.0083, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 11.31578947368421, | |
| "grad_norm": 0.7358989372167142, | |
| "learning_rate": 7.009438710491978e-07, | |
| "loss": 0.0066, | |
| "step": 1833 | |
| }, | |
| { | |
| "epoch": 11.321981424148607, | |
| "grad_norm": 0.5454191148891866, | |
| "learning_rate": 6.870271210174139e-07, | |
| "loss": 0.0078, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 11.328173374613003, | |
| "grad_norm": 0.5617217241243505, | |
| "learning_rate": 6.732489577182421e-07, | |
| "loss": 0.0073, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 11.3343653250774, | |
| "grad_norm": 0.4411947377739936, | |
| "learning_rate": 6.596094198731473e-07, | |
| "loss": 0.0059, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 11.340557275541796, | |
| "grad_norm": 0.4038338751853977, | |
| "learning_rate": 6.461085458140059e-07, | |
| "loss": 0.0056, | |
| "step": 1837 | |
| }, | |
| { | |
| "epoch": 11.346749226006192, | |
| "grad_norm": 0.45642980515517534, | |
| "learning_rate": 6.327463734830174e-07, | |
| "loss": 0.007, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 11.352941176470589, | |
| "grad_norm": 0.712071078719854, | |
| "learning_rate": 6.195229404325542e-07, | |
| "loss": 0.0079, | |
| "step": 1839 | |
| }, | |
| { | |
| "epoch": 11.359133126934985, | |
| "grad_norm": 0.40511189890054594, | |
| "learning_rate": 6.064382838251059e-07, | |
| "loss": 0.0069, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 11.365325077399381, | |
| "grad_norm": 1.6722730305147262, | |
| "learning_rate": 5.934924404331355e-07, | |
| "loss": 0.0068, | |
| "step": 1841 | |
| }, | |
| { | |
| "epoch": 11.371517027863778, | |
| "grad_norm": 0.77392054804963, | |
| "learning_rate": 5.806854466390011e-07, | |
| "loss": 0.0075, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 11.377708978328174, | |
| "grad_norm": 0.509056072217164, | |
| "learning_rate": 5.680173384348453e-07, | |
| "loss": 0.0058, | |
| "step": 1843 | |
| }, | |
| { | |
| "epoch": 11.38390092879257, | |
| "grad_norm": 0.37899654795682763, | |
| "learning_rate": 5.554881514224897e-07, | |
| "loss": 0.0049, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 11.390092879256965, | |
| "grad_norm": 0.46168661436291886, | |
| "learning_rate": 5.430979208133402e-07, | |
| "loss": 0.0081, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 11.396284829721361, | |
| "grad_norm": 0.8165580802526049, | |
| "learning_rate": 5.30846681428282e-07, | |
| "loss": 0.0066, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 11.402476780185758, | |
| "grad_norm": 0.4790357107913533, | |
| "learning_rate": 5.187344676976014e-07, | |
| "loss": 0.0068, | |
| "step": 1847 | |
| }, | |
| { | |
| "epoch": 11.408668730650154, | |
| "grad_norm": 0.46815040147214804, | |
| "learning_rate": 5.067613136608473e-07, | |
| "loss": 0.0093, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 11.41486068111455, | |
| "grad_norm": 0.5326417302012959, | |
| "learning_rate": 4.949272529667926e-07, | |
| "loss": 0.0065, | |
| "step": 1849 | |
| }, | |
| { | |
| "epoch": 11.421052631578947, | |
| "grad_norm": 0.5816630635354162, | |
| "learning_rate": 4.832323188732835e-07, | |
| "loss": 0.0075, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 11.427244582043343, | |
| "grad_norm": 0.48427773798496576, | |
| "learning_rate": 4.7167654424718487e-07, | |
| "loss": 0.0068, | |
| "step": 1851 | |
| }, | |
| { | |
| "epoch": 11.43343653250774, | |
| "grad_norm": 0.5497025572817426, | |
| "learning_rate": 4.6025996156428e-07, | |
| "loss": 0.0078, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 11.439628482972136, | |
| "grad_norm": 0.5510500182306229, | |
| "learning_rate": 4.4898260290915927e-07, | |
| "loss": 0.0075, | |
| "step": 1853 | |
| }, | |
| { | |
| "epoch": 11.445820433436532, | |
| "grad_norm": 0.44627509003732324, | |
| "learning_rate": 4.3784449997515406e-07, | |
| "loss": 0.0077, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 11.452012383900929, | |
| "grad_norm": 0.4334395348335764, | |
| "learning_rate": 4.2684568406423654e-07, | |
| "loss": 0.0061, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 11.458204334365325, | |
| "grad_norm": 0.4842234079078647, | |
| "learning_rate": 4.159861860869307e-07, | |
| "loss": 0.0068, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 11.464396284829721, | |
| "grad_norm": 0.45320319836783474, | |
| "learning_rate": 4.0526603656223516e-07, | |
| "loss": 0.0073, | |
| "step": 1857 | |
| }, | |
| { | |
| "epoch": 11.470588235294118, | |
| "grad_norm": 0.42575539666611417, | |
| "learning_rate": 3.946852656175226e-07, | |
| "loss": 0.0068, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 11.476780185758514, | |
| "grad_norm": 0.4652897286828862, | |
| "learning_rate": 3.8424390298846815e-07, | |
| "loss": 0.0071, | |
| "step": 1859 | |
| }, | |
| { | |
| "epoch": 11.48297213622291, | |
| "grad_norm": 0.5116973817438624, | |
| "learning_rate": 3.739419780189657e-07, | |
| "loss": 0.0085, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 11.489164086687307, | |
| "grad_norm": 0.6935134709789724, | |
| "learning_rate": 3.6377951966102276e-07, | |
| "loss": 0.007, | |
| "step": 1861 | |
| }, | |
| { | |
| "epoch": 11.495356037151703, | |
| "grad_norm": 0.41416047318174065, | |
| "learning_rate": 3.537565564747103e-07, | |
| "loss": 0.0074, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 11.5015479876161, | |
| "grad_norm": 0.5586880199039042, | |
| "learning_rate": 3.4387311662807397e-07, | |
| "loss": 0.0073, | |
| "step": 1863 | |
| }, | |
| { | |
| "epoch": 11.507739938080496, | |
| "grad_norm": 0.4896846203221236, | |
| "learning_rate": 3.341292278970398e-07, | |
| "loss": 0.0065, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 11.513931888544892, | |
| "grad_norm": 0.5620994205712971, | |
| "learning_rate": 3.24524917665342e-07, | |
| "loss": 0.0066, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 11.520123839009289, | |
| "grad_norm": 0.5017437806583144, | |
| "learning_rate": 3.1506021292447287e-07, | |
| "loss": 0.0067, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 11.526315789473685, | |
| "grad_norm": 0.5215174105998739, | |
| "learning_rate": 3.057351402735553e-07, | |
| "loss": 0.0082, | |
| "step": 1867 | |
| }, | |
| { | |
| "epoch": 11.532507739938081, | |
| "grad_norm": 0.4829789050249859, | |
| "learning_rate": 2.965497259193151e-07, | |
| "loss": 0.0067, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 11.538699690402478, | |
| "grad_norm": 0.5822395608103282, | |
| "learning_rate": 2.8750399567599173e-07, | |
| "loss": 0.0067, | |
| "step": 1869 | |
| }, | |
| { | |
| "epoch": 11.544891640866872, | |
| "grad_norm": 0.45867408894473916, | |
| "learning_rate": 2.7859797496523897e-07, | |
| "loss": 0.0072, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 11.551083591331269, | |
| "grad_norm": 0.5205589054718633, | |
| "learning_rate": 2.6983168881611895e-07, | |
| "loss": 0.0055, | |
| "step": 1871 | |
| }, | |
| { | |
| "epoch": 11.557275541795665, | |
| "grad_norm": 0.39738862312410056, | |
| "learning_rate": 2.612051618649525e-07, | |
| "loss": 0.006, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 11.563467492260061, | |
| "grad_norm": 0.5013445636761662, | |
| "learning_rate": 2.527184183553022e-07, | |
| "loss": 0.0079, | |
| "step": 1873 | |
| }, | |
| { | |
| "epoch": 11.569659442724458, | |
| "grad_norm": 0.5104992056179086, | |
| "learning_rate": 2.443714821379006e-07, | |
| "loss": 0.0085, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 11.575851393188854, | |
| "grad_norm": 0.691277343224864, | |
| "learning_rate": 2.3616437667055015e-07, | |
| "loss": 0.008, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 11.58204334365325, | |
| "grad_norm": 0.4802053768271533, | |
| "learning_rate": 2.2809712501810632e-07, | |
| "loss": 0.0067, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 11.588235294117647, | |
| "grad_norm": 0.605809537482388, | |
| "learning_rate": 2.2016974985236693e-07, | |
| "loss": 0.0071, | |
| "step": 1877 | |
| }, | |
| { | |
| "epoch": 11.594427244582043, | |
| "grad_norm": 0.7705825543183333, | |
| "learning_rate": 2.1238227345202754e-07, | |
| "loss": 0.0073, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 11.60061919504644, | |
| "grad_norm": 0.4715519242573243, | |
| "learning_rate": 2.047347177026371e-07, | |
| "loss": 0.007, | |
| "step": 1879 | |
| }, | |
| { | |
| "epoch": 11.606811145510836, | |
| "grad_norm": 1.0390341494339923, | |
| "learning_rate": 1.9722710409650347e-07, | |
| "loss": 0.007, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 11.613003095975232, | |
| "grad_norm": 0.44854359389428083, | |
| "learning_rate": 1.898594537326437e-07, | |
| "loss": 0.005, | |
| "step": 1881 | |
| }, | |
| { | |
| "epoch": 11.619195046439629, | |
| "grad_norm": 0.38508653976333107, | |
| "learning_rate": 1.8263178731675047e-07, | |
| "loss": 0.0063, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 11.625386996904025, | |
| "grad_norm": 0.6846671924981257, | |
| "learning_rate": 1.7554412516108677e-07, | |
| "loss": 0.0075, | |
| "step": 1883 | |
| }, | |
| { | |
| "epoch": 11.631578947368421, | |
| "grad_norm": 0.4785043704303202, | |
| "learning_rate": 1.685964871844692e-07, | |
| "loss": 0.007, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 11.637770897832818, | |
| "grad_norm": 0.5534942405207746, | |
| "learning_rate": 1.6178889291220133e-07, | |
| "loss": 0.0066, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 11.643962848297214, | |
| "grad_norm": 0.6334060900627472, | |
| "learning_rate": 1.5512136147600164e-07, | |
| "loss": 0.0067, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 11.65015479876161, | |
| "grad_norm": 0.5157177078600752, | |
| "learning_rate": 1.4859391161397007e-07, | |
| "loss": 0.0065, | |
| "step": 1887 | |
| }, | |
| { | |
| "epoch": 11.656346749226007, | |
| "grad_norm": 0.45312969046876567, | |
| "learning_rate": 1.4220656167053258e-07, | |
| "loss": 0.0079, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 11.662538699690403, | |
| "grad_norm": 0.5352641681979426, | |
| "learning_rate": 1.3595932959638013e-07, | |
| "loss": 0.0069, | |
| "step": 1889 | |
| }, | |
| { | |
| "epoch": 11.6687306501548, | |
| "grad_norm": 0.42825165496945455, | |
| "learning_rate": 1.2985223294841865e-07, | |
| "loss": 0.0068, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 11.674922600619196, | |
| "grad_norm": 0.46191991329180665, | |
| "learning_rate": 1.2388528888973017e-07, | |
| "loss": 0.0063, | |
| "step": 1891 | |
| }, | |
| { | |
| "epoch": 11.681114551083592, | |
| "grad_norm": 0.5332588937649859, | |
| "learning_rate": 1.1805851418952296e-07, | |
| "loss": 0.0056, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 11.687306501547988, | |
| "grad_norm": 0.5317234947401625, | |
| "learning_rate": 1.1237192522307593e-07, | |
| "loss": 0.006, | |
| "step": 1893 | |
| }, | |
| { | |
| "epoch": 11.693498452012385, | |
| "grad_norm": 0.49736650975887164, | |
| "learning_rate": 1.068255379716887e-07, | |
| "loss": 0.0081, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 11.69969040247678, | |
| "grad_norm": 0.47643332055864485, | |
| "learning_rate": 1.0141936802265939e-07, | |
| "loss": 0.007, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 11.705882352941176, | |
| "grad_norm": 0.41908853217415565, | |
| "learning_rate": 9.615343056922354e-08, | |
| "loss": 0.0059, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 11.712074303405572, | |
| "grad_norm": 0.5727683236930392, | |
| "learning_rate": 9.102774041049866e-08, | |
| "loss": 0.0063, | |
| "step": 1897 | |
| }, | |
| { | |
| "epoch": 11.718266253869968, | |
| "grad_norm": 0.43881012020217974, | |
| "learning_rate": 8.604231195147861e-08, | |
| "loss": 0.007, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 11.724458204334365, | |
| "grad_norm": 0.434012984801701, | |
| "learning_rate": 8.119715920296145e-08, | |
| "loss": 0.0071, | |
| "step": 1899 | |
| }, | |
| { | |
| "epoch": 11.730650154798761, | |
| "grad_norm": 0.5668726891302649, | |
| "learning_rate": 7.649229578152728e-08, | |
| "loss": 0.0056, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 11.736842105263158, | |
| "grad_norm": 0.5656514949124021, | |
| "learning_rate": 7.192773490948823e-08, | |
| "loss": 0.0072, | |
| "step": 1901 | |
| }, | |
| { | |
| "epoch": 11.743034055727554, | |
| "grad_norm": 0.6164067533137532, | |
| "learning_rate": 6.750348941486073e-08, | |
| "loss": 0.0092, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 11.74922600619195, | |
| "grad_norm": 1.2472133137974002, | |
| "learning_rate": 6.321957173132664e-08, | |
| "loss": 0.0066, | |
| "step": 1903 | |
| }, | |
| { | |
| "epoch": 11.755417956656347, | |
| "grad_norm": 0.4392934542646206, | |
| "learning_rate": 5.9075993898188854e-08, | |
| "loss": 0.0075, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 11.761609907120743, | |
| "grad_norm": 0.49472358771014996, | |
| "learning_rate": 5.507276756036017e-08, | |
| "loss": 0.0072, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 11.76780185758514, | |
| "grad_norm": 1.1317299783753785, | |
| "learning_rate": 5.1209903968302276e-08, | |
| "loss": 0.0067, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 11.773993808049536, | |
| "grad_norm": 0.5987121278705873, | |
| "learning_rate": 4.7487413978025696e-08, | |
| "loss": 0.0059, | |
| "step": 1907 | |
| }, | |
| { | |
| "epoch": 11.780185758513932, | |
| "grad_norm": 0.4778726713011685, | |
| "learning_rate": 4.390530805102322e-08, | |
| "loss": 0.0069, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 11.786377708978328, | |
| "grad_norm": 0.5588314148416086, | |
| "learning_rate": 4.0463596254269875e-08, | |
| "loss": 0.0063, | |
| "step": 1909 | |
| }, | |
| { | |
| "epoch": 11.792569659442725, | |
| "grad_norm": 0.4740418584164989, | |
| "learning_rate": 3.7162288260178536e-08, | |
| "loss": 0.0094, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 11.798761609907121, | |
| "grad_norm": 0.6334094812461899, | |
| "learning_rate": 3.4001393346588806e-08, | |
| "loss": 0.0063, | |
| "step": 1911 | |
| }, | |
| { | |
| "epoch": 11.804953560371517, | |
| "grad_norm": 0.8497258020172912, | |
| "learning_rate": 3.098092039671707e-08, | |
| "loss": 0.0097, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 11.811145510835914, | |
| "grad_norm": 0.5492510215863654, | |
| "learning_rate": 2.8100877899156498e-08, | |
| "loss": 0.0069, | |
| "step": 1913 | |
| }, | |
| { | |
| "epoch": 11.81733746130031, | |
| "grad_norm": 0.46923531975887417, | |
| "learning_rate": 2.5361273947827058e-08, | |
| "loss": 0.0087, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 11.823529411764707, | |
| "grad_norm": 0.5954689826520188, | |
| "learning_rate": 2.2762116241981103e-08, | |
| "loss": 0.0072, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 11.829721362229103, | |
| "grad_norm": 0.6339550310655667, | |
| "learning_rate": 2.0303412086164485e-08, | |
| "loss": 0.0072, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 11.8359133126935, | |
| "grad_norm": 0.4144783163072785, | |
| "learning_rate": 1.7985168390194372e-08, | |
| "loss": 0.0056, | |
| "step": 1917 | |
| }, | |
| { | |
| "epoch": 11.842105263157894, | |
| "grad_norm": 0.40612660529647476, | |
| "learning_rate": 1.580739166914813e-08, | |
| "loss": 0.0055, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 11.848297213622292, | |
| "grad_norm": 0.4767050444962352, | |
| "learning_rate": 1.3770088043335572e-08, | |
| "loss": 0.0081, | |
| "step": 1919 | |
| }, | |
| { | |
| "epoch": 11.854489164086687, | |
| "grad_norm": 0.6771418638491573, | |
| "learning_rate": 1.187326323830451e-08, | |
| "loss": 0.0059, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 11.860681114551083, | |
| "grad_norm": 0.5268183349235755, | |
| "learning_rate": 1.01169225847908e-08, | |
| "loss": 0.0084, | |
| "step": 1921 | |
| }, | |
| { | |
| "epoch": 11.86687306501548, | |
| "grad_norm": 0.5714529234379931, | |
| "learning_rate": 8.501071018729434e-09, | |
| "loss": 0.0085, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 11.873065015479876, | |
| "grad_norm": 0.641796851723476, | |
| "learning_rate": 7.025713081232344e-09, | |
| "loss": 0.0061, | |
| "step": 1923 | |
| }, | |
| { | |
| "epoch": 11.879256965944272, | |
| "grad_norm": 0.5786492854731642, | |
| "learning_rate": 5.690852918566192e-09, | |
| "loss": 0.0074, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 11.885448916408668, | |
| "grad_norm": 0.4395964372874901, | |
| "learning_rate": 4.496494282157926e-09, | |
| "loss": 0.0077, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 11.891640866873065, | |
| "grad_norm": 0.6144859848178597, | |
| "learning_rate": 3.4426405285725717e-09, | |
| "loss": 0.0062, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 11.897832817337461, | |
| "grad_norm": 0.45027881629827304, | |
| "learning_rate": 2.529294619513234e-09, | |
| "loss": 0.0053, | |
| "step": 1927 | |
| }, | |
| { | |
| "epoch": 11.904024767801857, | |
| "grad_norm": 0.4100958029737214, | |
| "learning_rate": 1.7564591217933412e-09, | |
| "loss": 0.0071, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 11.910216718266254, | |
| "grad_norm": 0.49678388184393957, | |
| "learning_rate": 1.1241362073588502e-09, | |
| "loss": 0.0084, | |
| "step": 1929 | |
| }, | |
| { | |
| "epoch": 11.91640866873065, | |
| "grad_norm": 0.5684024725276834, | |
| "learning_rate": 6.323276532604893e-10, | |
| "loss": 0.0067, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 11.922600619195046, | |
| "grad_norm": 0.5054176005019508, | |
| "learning_rate": 2.8103484164820893e-10, | |
| "loss": 0.0071, | |
| "step": 1931 | |
| }, | |
| { | |
| "epoch": 11.928792569659443, | |
| "grad_norm": 0.6033011906551876, | |
| "learning_rate": 7.025875977673124e-11, | |
| "loss": 0.0063, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 11.928792569659443, | |
| "step": 1932, | |
| "total_flos": 6213184927956992.0, | |
| "train_loss": 1.5306146964587475, | |
| "train_runtime": 72417.837, | |
| "train_samples_per_second": 3.424, | |
| "train_steps_per_second": 0.027 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1932, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 12, | |
| "save_steps": 50000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6213184927956992.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |