| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.541516245487364, |
| "eval_steps": 770, |
| "global_step": 770, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.007220216606498195, |
| "grad_norm": 2.68963885307312, |
| "learning_rate": 0.0, |
| "loss": 1.880106806755066, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.01444043321299639, |
| "grad_norm": 2.3691370487213135, |
| "learning_rate": 8e-05, |
| "loss": 1.882678508758545, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.021660649819494584, |
| "grad_norm": 3.7024030685424805, |
| "learning_rate": 0.00016, |
| "loss": 1.9426246881484985, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.02888086642599278, |
| "grad_norm": 0.7646510601043701, |
| "learning_rate": 0.00024, |
| "loss": 1.6261038780212402, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.036101083032490974, |
| "grad_norm": 0.6628475189208984, |
| "learning_rate": 0.00032, |
| "loss": 1.478842854499817, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.04332129963898917, |
| "grad_norm": 0.7141556143760681, |
| "learning_rate": 0.0004, |
| "loss": 1.1376413106918335, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.05054151624548736, |
| "grad_norm": 0.7212001085281372, |
| "learning_rate": 0.0003999983135391464, |
| "loss": 1.0410869121551514, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.05776173285198556, |
| "grad_norm": 0.7515780925750732, |
| "learning_rate": 0.0003999932541850271, |
| "loss": 0.8246882557868958, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.06498194945848375, |
| "grad_norm": 2.474442481994629, |
| "learning_rate": 0.0003999848220229661, |
| "loss": 0.7787771821022034, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.07220216606498195, |
| "grad_norm": 1.2009592056274414, |
| "learning_rate": 0.0003999730171951686, |
| "loss": 0.6830324530601501, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.07942238267148015, |
| "grad_norm": 0.7545379996299744, |
| "learning_rate": 0.00039995783990071825, |
| "loss": 0.7438161969184875, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.08664259927797834, |
| "grad_norm": 0.9728625416755676, |
| "learning_rate": 0.0003999392903955744, |
| "loss": 0.6851149797439575, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.09386281588447654, |
| "grad_norm": 0.5509404540061951, |
| "learning_rate": 0.00039991736899256693, |
| "loss": 0.5853980183601379, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.10108303249097472, |
| "grad_norm": 0.715829074382782, |
| "learning_rate": 0.0003998920760613919, |
| "loss": 0.7855823040008545, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.10830324909747292, |
| "grad_norm": 0.6777167320251465, |
| "learning_rate": 0.0003998634120286046, |
| "loss": 0.5202292799949646, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.11552346570397112, |
| "grad_norm": 0.5348321199417114, |
| "learning_rate": 0.0003998313773776128, |
| "loss": 0.6527559757232666, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.12274368231046931, |
| "grad_norm": 0.4939132332801819, |
| "learning_rate": 0.0003997959726486683, |
| "loss": 0.584251880645752, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.1299638989169675, |
| "grad_norm": 0.47802454233169556, |
| "learning_rate": 0.00039975719843885805, |
| "loss": 0.5235528349876404, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.1371841155234657, |
| "grad_norm": 0.44072362780570984, |
| "learning_rate": 0.0003997150554020938, |
| "loss": 0.5184933543205261, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.1444043321299639, |
| "grad_norm": 0.4251253306865692, |
| "learning_rate": 0.00039966954424910147, |
| "loss": 0.6167617440223694, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.15162454873646208, |
| "grad_norm": 0.5198166370391846, |
| "learning_rate": 0.00039962066574740886, |
| "loss": 0.7412333488464355, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.1588447653429603, |
| "grad_norm": 0.44667476415634155, |
| "learning_rate": 0.00039956842072133266, |
| "loss": 0.6361726522445679, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.16606498194945848, |
| "grad_norm": 0.40787550806999207, |
| "learning_rate": 0.0003995128100519649, |
| "loss": 0.49531102180480957, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.17328519855595667, |
| "grad_norm": 0.4681910276412964, |
| "learning_rate": 0.00039945383467715766, |
| "loss": 0.584551990032196, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.18050541516245489, |
| "grad_norm": 0.41188451647758484, |
| "learning_rate": 0.00039939149559150753, |
| "loss": 0.6003429889678955, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.18772563176895307, |
| "grad_norm": 0.43262341618537903, |
| "learning_rate": 0.0003993257938463389, |
| "loss": 0.5725842118263245, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.19494584837545126, |
| "grad_norm": 0.45091742277145386, |
| "learning_rate": 0.0003992567305496859, |
| "loss": 0.6373569965362549, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.20216606498194944, |
| "grad_norm": 0.3687536120414734, |
| "learning_rate": 0.00039918430686627406, |
| "loss": 0.4958152174949646, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.20938628158844766, |
| "grad_norm": 0.3994904160499573, |
| "learning_rate": 0.00039910852401750036, |
| "loss": 0.6146813631057739, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.21660649819494585, |
| "grad_norm": 0.41312268376350403, |
| "learning_rate": 0.0003990293832814129, |
| "loss": 0.5313689708709717, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.22382671480144403, |
| "grad_norm": 0.37877994775772095, |
| "learning_rate": 0.0003989468859926893, |
| "loss": 0.5694329738616943, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.23104693140794225, |
| "grad_norm": 0.32251644134521484, |
| "learning_rate": 0.0003988610335426139, |
| "loss": 0.4476504921913147, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.23826714801444043, |
| "grad_norm": 0.40205422043800354, |
| "learning_rate": 0.0003987718273790548, |
| "loss": 0.4696302115917206, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.24548736462093862, |
| "grad_norm": 0.3664834201335907, |
| "learning_rate": 0.0003986792690064389, |
| "loss": 0.5304725170135498, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.2527075812274368, |
| "grad_norm": 0.5150560736656189, |
| "learning_rate": 0.000398583359985727, |
| "loss": 0.5886795520782471, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.259927797833935, |
| "grad_norm": 0.37544649839401245, |
| "learning_rate": 0.0003984841019343872, |
| "loss": 0.6374161839485168, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.26714801444043323, |
| "grad_norm": 0.3426123857498169, |
| "learning_rate": 0.0003983814965263676, |
| "loss": 0.507249116897583, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.2743682310469314, |
| "grad_norm": 0.36909183859825134, |
| "learning_rate": 0.00039827554549206836, |
| "loss": 0.5216564536094666, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.2815884476534296, |
| "grad_norm": 0.3864560127258301, |
| "learning_rate": 0.00039816625061831206, |
| "loss": 0.4607892632484436, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.2888086642599278, |
| "grad_norm": 0.3553301692008972, |
| "learning_rate": 0.0003980536137483141, |
| "loss": 0.6021184921264648, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.296028880866426, |
| "grad_norm": 0.3300604224205017, |
| "learning_rate": 0.0003979376367816511, |
| "loss": 0.49985307455062866, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.30324909747292417, |
| "grad_norm": 0.3894491493701935, |
| "learning_rate": 0.0003978183216742292, |
| "loss": 0.5403987169265747, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.3104693140794224, |
| "grad_norm": 0.32339245080947876, |
| "learning_rate": 0.0003976956704382511, |
| "loss": 0.5262795090675354, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.3176895306859206, |
| "grad_norm": 0.3121689260005951, |
| "learning_rate": 0.00039756968514218166, |
| "loss": 0.4988410174846649, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.3249097472924188, |
| "grad_norm": 0.37562957406044006, |
| "learning_rate": 0.0003974403679107138, |
| "loss": 0.5539376735687256, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.33212996389891697, |
| "grad_norm": 0.3560684323310852, |
| "learning_rate": 0.0003973077209247319, |
| "loss": 0.4829791784286499, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.33935018050541516, |
| "grad_norm": 0.3543923497200012, |
| "learning_rate": 0.0003971717464212755, |
| "loss": 0.4733068346977234, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.34657039711191334, |
| "grad_norm": 0.3253995180130005, |
| "learning_rate": 0.0003970324466935013, |
| "loss": 0.4318773150444031, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.35379061371841153, |
| "grad_norm": 0.3587929904460907, |
| "learning_rate": 0.0003968898240906447, |
| "loss": 0.6348429322242737, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.36101083032490977, |
| "grad_norm": 0.3772837519645691, |
| "learning_rate": 0.0003967438810179802, |
| "loss": 0.5192855596542358, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.36823104693140796, |
| "grad_norm": 0.3878256380558014, |
| "learning_rate": 0.0003965946199367804, |
| "loss": 0.4653000831604004, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.37545126353790614, |
| "grad_norm": 0.306704044342041, |
| "learning_rate": 0.00039644204336427506, |
| "loss": 0.46054646372795105, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.38267148014440433, |
| "grad_norm": 0.3290722072124481, |
| "learning_rate": 0.0003962861538736084, |
| "loss": 0.4637136459350586, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.3898916967509025, |
| "grad_norm": 0.3877376914024353, |
| "learning_rate": 0.00039612695409379557, |
| "loss": 0.6008591651916504, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.3971119133574007, |
| "grad_norm": 0.3738860785961151, |
| "learning_rate": 0.00039596444670967864, |
| "loss": 0.49255216121673584, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.4043321299638989, |
| "grad_norm": 0.43176719546318054, |
| "learning_rate": 0.0003957986344618809, |
| "loss": 0.6328399777412415, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.41155234657039713, |
| "grad_norm": 0.34965041279792786, |
| "learning_rate": 0.00039562952014676116, |
| "loss": 0.5274545550346375, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.4187725631768953, |
| "grad_norm": 0.3525114059448242, |
| "learning_rate": 0.000395457106616366, |
| "loss": 0.5174035429954529, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.4259927797833935, |
| "grad_norm": 0.36805179715156555, |
| "learning_rate": 0.0003952813967783822, |
| "loss": 0.5132086277008057, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.4332129963898917, |
| "grad_norm": 0.3320341110229492, |
| "learning_rate": 0.00039510239359608735, |
| "loss": 0.47466331720352173, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.4404332129963899, |
| "grad_norm": 0.36727970838546753, |
| "learning_rate": 0.0003949201000883001, |
| "loss": 0.5845056772232056, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.44765342960288806, |
| "grad_norm": 0.31494706869125366, |
| "learning_rate": 0.000394734519329329, |
| "loss": 0.5123739242553711, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.4548736462093863, |
| "grad_norm": 0.31959858536720276, |
| "learning_rate": 0.00039454565444892094, |
| "loss": 0.3888605237007141, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.4620938628158845, |
| "grad_norm": 0.4212755262851715, |
| "learning_rate": 0.0003943535086322083, |
| "loss": 0.6065781712532043, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.4693140794223827, |
| "grad_norm": 0.2938922643661499, |
| "learning_rate": 0.0003941580851196549, |
| "loss": 0.47488927841186523, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.47653429602888087, |
| "grad_norm": 0.27896228432655334, |
| "learning_rate": 0.00039395938720700196, |
| "loss": 0.36559349298477173, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.48375451263537905, |
| "grad_norm": 0.34284013509750366, |
| "learning_rate": 0.0003937574182452118, |
| "loss": 0.49944937229156494, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.49097472924187724, |
| "grad_norm": 0.3411049544811249, |
| "learning_rate": 0.00039355218164041194, |
| "loss": 0.5018436312675476, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.4981949458483754, |
| "grad_norm": 0.35431182384490967, |
| "learning_rate": 0.0003933436808538375, |
| "loss": 0.42643067240715027, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.5054151624548736, |
| "grad_norm": 0.3351145386695862, |
| "learning_rate": 0.00039313191940177256, |
| "loss": 0.5355833768844604, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5126353790613718, |
| "grad_norm": 0.3065779507160187, |
| "learning_rate": 0.00039291690085549105, |
| "loss": 0.4329970180988312, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.51985559566787, |
| "grad_norm": 0.5208595395088196, |
| "learning_rate": 0.00039269862884119664, |
| "loss": 0.5850666165351868, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.5270758122743683, |
| "grad_norm": 0.3183553218841553, |
| "learning_rate": 0.00039247710703996144, |
| "loss": 0.5500473380088806, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.5342960288808665, |
| "grad_norm": 0.32788676023483276, |
| "learning_rate": 0.0003922523391876638, |
| "loss": 0.5200982093811035, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.5415162454873647, |
| "grad_norm": 0.2959986925125122, |
| "learning_rate": 0.00039202432907492566, |
| "loss": 0.4983074963092804, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.5487364620938628, |
| "grad_norm": 0.32182440161705017, |
| "learning_rate": 0.0003917930805470483, |
| "loss": 0.44362854957580566, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.555956678700361, |
| "grad_norm": 0.2816471755504608, |
| "learning_rate": 0.00039155859750394754, |
| "loss": 0.44515836238861084, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.5631768953068592, |
| "grad_norm": 0.290783554315567, |
| "learning_rate": 0.0003913208839000882, |
| "loss": 0.46542418003082275, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.5703971119133574, |
| "grad_norm": 0.366773396730423, |
| "learning_rate": 0.0003910799437444171, |
| "loss": 0.5370551943778992, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.5776173285198556, |
| "grad_norm": 0.32066911458969116, |
| "learning_rate": 0.0003908357811002957, |
| "loss": 0.5146384239196777, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.5848375451263538, |
| "grad_norm": 0.3420974016189575, |
| "learning_rate": 0.00039058840008543135, |
| "loss": 0.6218519806861877, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.592057761732852, |
| "grad_norm": 0.3156997263431549, |
| "learning_rate": 0.000390337804871808, |
| "loss": 0.549630343914032, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.5992779783393501, |
| "grad_norm": 0.3494798541069031, |
| "learning_rate": 0.00039008399968561597, |
| "loss": 0.5499352216720581, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.6064981949458483, |
| "grad_norm": 0.3652164936065674, |
| "learning_rate": 0.0003898269888071803, |
| "loss": 0.5562577843666077, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.6137184115523465, |
| "grad_norm": 0.34751319885253906, |
| "learning_rate": 0.0003895667765708887, |
| "loss": 0.5954532027244568, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.6209386281588448, |
| "grad_norm": 0.28338274359703064, |
| "learning_rate": 0.0003893033673651189, |
| "loss": 0.4894581735134125, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.628158844765343, |
| "grad_norm": 0.29019951820373535, |
| "learning_rate": 0.00038903676563216394, |
| "loss": 0.4635826349258423, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.6353790613718412, |
| "grad_norm": 0.2782176434993744, |
| "learning_rate": 0.00038876697586815763, |
| "loss": 0.4121658205986023, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.6425992779783394, |
| "grad_norm": 0.27983078360557556, |
| "learning_rate": 0.0003884940026229988, |
| "loss": 0.40188780426979065, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.6498194945848376, |
| "grad_norm": 0.2786664366722107, |
| "learning_rate": 0.0003882178505002744, |
| "loss": 0.44132208824157715, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6570397111913358, |
| "grad_norm": 0.3355483412742615, |
| "learning_rate": 0.0003879385241571817, |
| "loss": 0.512315034866333, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.6642599277978339, |
| "grad_norm": 0.29554808139801025, |
| "learning_rate": 0.0003876560283044503, |
| "loss": 0.4540112018585205, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.6714801444043321, |
| "grad_norm": 0.3431720435619354, |
| "learning_rate": 0.00038737036770626214, |
| "loss": 0.45164141058921814, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.6787003610108303, |
| "grad_norm": 0.37090009450912476, |
| "learning_rate": 0.0003870815471801714, |
| "loss": 0.4878736734390259, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.6859205776173285, |
| "grad_norm": 0.35553082823753357, |
| "learning_rate": 0.0003867895715970231, |
| "loss": 0.5573399662971497, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.6931407942238267, |
| "grad_norm": 0.30914071202278137, |
| "learning_rate": 0.00038649444588087115, |
| "loss": 0.5417919754981995, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.7003610108303249, |
| "grad_norm": 0.27384090423583984, |
| "learning_rate": 0.0003861961750088954, |
| "loss": 0.4475242793560028, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.7075812274368231, |
| "grad_norm": 0.2919006049633026, |
| "learning_rate": 0.0003858947640113171, |
| "loss": 0.44341611862182617, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.7148014440433214, |
| "grad_norm": 0.33408623933792114, |
| "learning_rate": 0.000385590217971315, |
| "loss": 0.5056685209274292, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.7220216606498195, |
| "grad_norm": 0.3498390018939972, |
| "learning_rate": 0.0003852825420249386, |
| "loss": 0.45975226163864136, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7292418772563177, |
| "grad_norm": 0.32381942868232727, |
| "learning_rate": 0.0003849717413610224, |
| "loss": 0.43437302112579346, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.7364620938628159, |
| "grad_norm": 0.30022308230400085, |
| "learning_rate": 0.0003846578212210979, |
| "loss": 0.41799411177635193, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.7436823104693141, |
| "grad_norm": 0.31061094999313354, |
| "learning_rate": 0.0003843407868993054, |
| "loss": 0.5682204365730286, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.7509025270758123, |
| "grad_norm": 0.31062883138656616, |
| "learning_rate": 0.0003840206437423047, |
| "loss": 0.3520132303237915, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.7581227436823105, |
| "grad_norm": 0.36826908588409424, |
| "learning_rate": 0.00038369739714918464, |
| "loss": 0.546819269657135, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.7653429602888087, |
| "grad_norm": 0.35304203629493713, |
| "learning_rate": 0.00038337105257137263, |
| "loss": 0.5633726716041565, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.7725631768953068, |
| "grad_norm": 0.2879215478897095, |
| "learning_rate": 0.00038304161551254207, |
| "loss": 0.42212507128715515, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.779783393501805, |
| "grad_norm": 0.32802048325538635, |
| "learning_rate": 0.0003827090915285202, |
| "loss": 0.4973101019859314, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.7870036101083032, |
| "grad_norm": 0.29462599754333496, |
| "learning_rate": 0.0003823734862271937, |
| "loss": 0.4543662667274475, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.7942238267148014, |
| "grad_norm": 0.34288832545280457, |
| "learning_rate": 0.0003820348052684146, |
| "loss": 0.5599743127822876, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.8014440433212996, |
| "grad_norm": 0.2871699333190918, |
| "learning_rate": 0.00038169305436390477, |
| "loss": 0.5132560133934021, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.8086642599277978, |
| "grad_norm": 0.3307580053806305, |
| "learning_rate": 0.00038134823927715933, |
| "loss": 0.5319223403930664, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.8158844765342961, |
| "grad_norm": 0.3292873799800873, |
| "learning_rate": 0.0003810003658233498, |
| "loss": 0.5933513045310974, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.8231046931407943, |
| "grad_norm": 0.34675127267837524, |
| "learning_rate": 0.0003806494398692258, |
| "loss": 0.6370081901550293, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.8303249097472925, |
| "grad_norm": 0.31837624311447144, |
| "learning_rate": 0.00038029546733301615, |
| "loss": 0.44530773162841797, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.8375451263537906, |
| "grad_norm": 0.3014152944087982, |
| "learning_rate": 0.0003799384541843291, |
| "loss": 0.5310161113739014, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.8447653429602888, |
| "grad_norm": 0.30693644285202026, |
| "learning_rate": 0.00037957840644405164, |
| "loss": 0.5011177659034729, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.851985559566787, |
| "grad_norm": 0.2840467095375061, |
| "learning_rate": 0.000379215330184248, |
| "loss": 0.37618446350097656, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.8592057761732852, |
| "grad_norm": 0.3380180597305298, |
| "learning_rate": 0.00037884923152805715, |
| "loss": 0.5536396503448486, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.8664259927797834, |
| "grad_norm": 0.30026137828826904, |
| "learning_rate": 0.00037848011664958956, |
| "loss": 0.5487097501754761, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.8736462093862816, |
| "grad_norm": 0.3990669250488281, |
| "learning_rate": 0.00037810799177382325, |
| "loss": 0.34712734818458557, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.8808664259927798, |
| "grad_norm": 0.3168427646160126, |
| "learning_rate": 0.0003777328631764986, |
| "loss": 0.44405245780944824, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.8880866425992779, |
| "grad_norm": 0.2761262059211731, |
| "learning_rate": 0.0003773547371840124, |
| "loss": 0.4112606942653656, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.8953068592057761, |
| "grad_norm": 0.27056002616882324, |
| "learning_rate": 0.00037697362017331164, |
| "loss": 0.45218032598495483, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.9025270758122743, |
| "grad_norm": 0.2833767831325531, |
| "learning_rate": 0.00037658951857178543, |
| "loss": 0.4833817481994629, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.9097472924187726, |
| "grad_norm": 0.30818742513656616, |
| "learning_rate": 0.0003762024388571569, |
| "loss": 0.4847618341445923, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.9169675090252708, |
| "grad_norm": 0.32447490096092224, |
| "learning_rate": 0.00037581238755737405, |
| "loss": 0.5068320035934448, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.924187725631769, |
| "grad_norm": 0.307781845331192, |
| "learning_rate": 0.0003754193712504992, |
| "loss": 0.455619752407074, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.9314079422382672, |
| "grad_norm": 0.29909971356391907, |
| "learning_rate": 0.00037502339656459853, |
| "loss": 0.5618020296096802, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.9386281588447654, |
| "grad_norm": 0.33699020743370056, |
| "learning_rate": 0.0003746244701776302, |
| "loss": 0.50435870885849, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.9458483754512635, |
| "grad_norm": 0.2932925820350647, |
| "learning_rate": 0.0003742225988173315, |
| "loss": 0.503471314907074, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.9530685920577617, |
| "grad_norm": 0.27647075057029724, |
| "learning_rate": 0.00037381778926110567, |
| "loss": 0.4979417324066162, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.9602888086642599, |
| "grad_norm": 0.23902934789657593, |
| "learning_rate": 0.00037341004833590734, |
| "loss": 0.4127359390258789, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.9675090252707581, |
| "grad_norm": 0.3308359980583191, |
| "learning_rate": 0.0003729993829181276, |
| "loss": 0.6199836730957031, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.9747292418772563, |
| "grad_norm": 0.30997487902641296, |
| "learning_rate": 0.00037258579993347795, |
| "loss": 0.5043972134590149, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.9819494584837545, |
| "grad_norm": 0.5225651264190674, |
| "learning_rate": 0.00037216930635687357, |
| "loss": 0.5078832507133484, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.9891696750902527, |
| "grad_norm": 0.2910468280315399, |
| "learning_rate": 0.00037174990921231557, |
| "loss": 0.5478032827377319, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.9963898916967509, |
| "grad_norm": 0.3448226749897003, |
| "learning_rate": 0.00037132761557277257, |
| "loss": 0.5461761355400085, |
| "step": 138 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.5731388926506042, |
| "learning_rate": 0.00037090243256006155, |
| "loss": 0.39478886127471924, |
| "step": 139 |
| }, |
| { |
| "epoch": 1.0072202166064983, |
| "grad_norm": 0.3017331659793854, |
| "learning_rate": 0.0003704743673447276, |
| "loss": 0.3729703426361084, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.0144404332129964, |
| "grad_norm": 0.29272574186325073, |
| "learning_rate": 0.0003700434271459229, |
| "loss": 0.45498228073120117, |
| "step": 141 |
| }, |
| { |
| "epoch": 1.0216606498194947, |
| "grad_norm": 0.2818097770214081, |
| "learning_rate": 0.0003696096192312852, |
| "loss": 0.37411680817604065, |
| "step": 142 |
| }, |
| { |
| "epoch": 1.0288808664259927, |
| "grad_norm": 0.31325557827949524, |
| "learning_rate": 0.00036917295091681526, |
| "loss": 0.4299224019050598, |
| "step": 143 |
| }, |
| { |
| "epoch": 1.036101083032491, |
| "grad_norm": 0.2971343994140625, |
| "learning_rate": 0.0003687334295667533, |
| "loss": 0.4103966951370239, |
| "step": 144 |
| }, |
| { |
| "epoch": 1.0433212996389891, |
| "grad_norm": 0.32004889845848083, |
| "learning_rate": 0.00036829106259345465, |
| "loss": 0.49494248628616333, |
| "step": 145 |
| }, |
| { |
| "epoch": 1.0505415162454874, |
| "grad_norm": 0.29688122868537903, |
| "learning_rate": 0.00036784585745726535, |
| "loss": 0.4550933241844177, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.0577617328519855, |
| "grad_norm": 0.3322184681892395, |
| "learning_rate": 0.00036739782166639556, |
| "loss": 0.46367114782333374, |
| "step": 147 |
| }, |
| { |
| "epoch": 1.0649819494584838, |
| "grad_norm": 0.267844021320343, |
| "learning_rate": 0.00036694696277679367, |
| "loss": 0.33760106563568115, |
| "step": 148 |
| }, |
| { |
| "epoch": 1.0722021660649819, |
| "grad_norm": 0.3240787386894226, |
| "learning_rate": 0.0003664932883920182, |
| "loss": 0.43709027767181396, |
| "step": 149 |
| }, |
| { |
| "epoch": 1.0794223826714802, |
| "grad_norm": 0.29441356658935547, |
| "learning_rate": 0.00036603680616311015, |
| "loss": 0.4205693304538727, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.0866425992779782, |
| "grad_norm": 0.33195292949676514, |
| "learning_rate": 0.00036557752378846357, |
| "loss": 0.44472751021385193, |
| "step": 151 |
| }, |
| { |
| "epoch": 1.0938628158844765, |
| "grad_norm": 0.3113086223602295, |
| "learning_rate": 0.00036511544901369597, |
| "loss": 0.3894999921321869, |
| "step": 152 |
| }, |
| { |
| "epoch": 1.1010830324909748, |
| "grad_norm": 0.30926311016082764, |
| "learning_rate": 0.00036465058963151744, |
| "loss": 0.33893927931785583, |
| "step": 153 |
| }, |
| { |
| "epoch": 1.108303249097473, |
| "grad_norm": 0.31725767254829407, |
| "learning_rate": 0.00036418295348159967, |
| "loss": 0.4384872019290924, |
| "step": 154 |
| }, |
| { |
| "epoch": 1.1155234657039712, |
| "grad_norm": 0.3411625027656555, |
| "learning_rate": 0.00036371254845044297, |
| "loss": 0.4130849838256836, |
| "step": 155 |
| }, |
| { |
| "epoch": 1.1227436823104693, |
| "grad_norm": 0.3470022976398468, |
| "learning_rate": 0.0003632393824712444, |
| "loss": 0.4436883330345154, |
| "step": 156 |
| }, |
| { |
| "epoch": 1.1299638989169676, |
| "grad_norm": 0.31315702199935913, |
| "learning_rate": 0.00036276346352376266, |
| "loss": 0.39922523498535156, |
| "step": 157 |
| }, |
| { |
| "epoch": 1.1371841155234657, |
| "grad_norm": 0.36140701174736023, |
| "learning_rate": 0.00036228479963418465, |
| "loss": 0.44266432523727417, |
| "step": 158 |
| }, |
| { |
| "epoch": 1.144404332129964, |
| "grad_norm": 0.37621912360191345, |
| "learning_rate": 0.0003618033988749895, |
| "loss": 0.3703776001930237, |
| "step": 159 |
| }, |
| { |
| "epoch": 1.151624548736462, |
| "grad_norm": 0.40457242727279663, |
| "learning_rate": 0.0003613192693648125, |
| "loss": 0.48410341143608093, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.1588447653429603, |
| "grad_norm": 0.35907864570617676, |
| "learning_rate": 0.00036083241926830833, |
| "loss": 0.3249608874320984, |
| "step": 161 |
| }, |
| { |
| "epoch": 1.1660649819494584, |
| "grad_norm": 0.3328985869884491, |
| "learning_rate": 0.00036034285679601336, |
| "loss": 0.3642440140247345, |
| "step": 162 |
| }, |
| { |
| "epoch": 1.1732851985559567, |
| "grad_norm": 0.3639651834964752, |
| "learning_rate": 0.00035985059020420695, |
| "loss": 0.4125223457813263, |
| "step": 163 |
| }, |
| { |
| "epoch": 1.1805054151624548, |
| "grad_norm": 0.3122009038925171, |
| "learning_rate": 0.0003593556277947725, |
| "loss": 0.26333218812942505, |
| "step": 164 |
| }, |
| { |
| "epoch": 1.187725631768953, |
| "grad_norm": 0.3917737901210785, |
| "learning_rate": 0.0003588579779150572, |
| "loss": 0.41683149337768555, |
| "step": 165 |
| }, |
| { |
| "epoch": 1.1949458483754514, |
| "grad_norm": 0.3892008066177368, |
| "learning_rate": 0.00035835764895773164, |
| "loss": 0.34965166449546814, |
| "step": 166 |
| }, |
| { |
| "epoch": 1.2021660649819494, |
| "grad_norm": 0.32206663489341736, |
| "learning_rate": 0.0003578546493606477, |
| "loss": 0.3289134204387665, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.2093862815884477, |
| "grad_norm": 0.373098760843277, |
| "learning_rate": 0.0003573489876066967, |
| "loss": 0.37044239044189453, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.2166064981949458, |
| "grad_norm": 0.4301343262195587, |
| "learning_rate": 0.00035684067222366614, |
| "loss": 0.46339157223701477, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.2238267148014441, |
| "grad_norm": 0.3458724915981293, |
| "learning_rate": 0.00035632971178409603, |
| "loss": 0.41736456751823425, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.2310469314079422, |
| "grad_norm": 0.45238223671913147, |
| "learning_rate": 0.0003558161149051341, |
| "loss": 0.4182761311531067, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.2382671480144405, |
| "grad_norm": 0.32411810755729675, |
| "learning_rate": 0.0003552998902483907, |
| "loss": 0.30536049604415894, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.2454873646209386, |
| "grad_norm": 0.40648043155670166, |
| "learning_rate": 0.0003547810465197926, |
| "loss": 0.45940107107162476, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.2527075812274369, |
| "grad_norm": 0.34474775195121765, |
| "learning_rate": 0.00035425959246943614, |
| "loss": 0.4267578423023224, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.259927797833935, |
| "grad_norm": 0.33473461866378784, |
| "learning_rate": 0.00035373553689143977, |
| "loss": 0.31750980019569397, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.2671480144404332, |
| "grad_norm": 0.29968079924583435, |
| "learning_rate": 0.0003532088886237956, |
| "loss": 0.3412264883518219, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.2743682310469313, |
| "grad_norm": 0.3362952172756195, |
| "learning_rate": 0.00035267965654822063, |
| "loss": 0.347223162651062, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.2815884476534296, |
| "grad_norm": 0.38441580533981323, |
| "learning_rate": 0.0003521478495900065, |
| "loss": 0.4132072925567627, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.288808664259928, |
| "grad_norm": 0.3259715437889099, |
| "learning_rate": 0.00035161347671786947, |
| "loss": 0.3259287476539612, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.296028880866426, |
| "grad_norm": 0.30564218759536743, |
| "learning_rate": 0.00035107654694379876, |
| "loss": 0.33284130692481995, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.303249097472924, |
| "grad_norm": 0.3464779853820801, |
| "learning_rate": 0.0003505370693229049, |
| "loss": 0.3613452613353729, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.3104693140794224, |
| "grad_norm": 0.4113422632217407, |
| "learning_rate": 0.0003499950529532668, |
| "loss": 0.49952858686447144, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.3176895306859207, |
| "grad_norm": 0.3503414988517761, |
| "learning_rate": 0.0003494505069757782, |
| "loss": 0.43928611278533936, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.3249097472924187, |
| "grad_norm": 0.3231402337551117, |
| "learning_rate": 0.000348903440573994, |
| "loss": 0.35302072763442993, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.332129963898917, |
| "grad_norm": 0.3849830627441406, |
| "learning_rate": 0.00034835386297397486, |
| "loss": 0.42382097244262695, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.339350180505415, |
| "grad_norm": 0.35116714239120483, |
| "learning_rate": 0.00034780178344413185, |
| "loss": 0.3274853229522705, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.3465703971119134, |
| "grad_norm": 0.3941578269004822, |
| "learning_rate": 0.0003472472112950701, |
| "loss": 0.34617680311203003, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.3537906137184115, |
| "grad_norm": 0.32104700803756714, |
| "learning_rate": 0.00034669015587943184, |
| "loss": 0.39410364627838135, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.3610108303249098, |
| "grad_norm": 0.42431211471557617, |
| "learning_rate": 0.00034613062659173867, |
| "loss": 0.49896761775016785, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.3682310469314078, |
| "grad_norm": 0.4387979209423065, |
| "learning_rate": 0.00034556863286823284, |
| "loss": 0.4435005784034729, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.3754512635379061, |
| "grad_norm": 0.3983216881752014, |
| "learning_rate": 0.00034500418418671855, |
| "loss": 0.44415876269340515, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.3826714801444044, |
| "grad_norm": 0.30740320682525635, |
| "learning_rate": 0.00034443729006640184, |
| "loss": 0.36664414405822754, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.3898916967509025, |
| "grad_norm": 0.35397860407829285, |
| "learning_rate": 0.0003438679600677302, |
| "loss": 0.36680418252944946, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.3971119133574006, |
| "grad_norm": 0.3522591292858124, |
| "learning_rate": 0.0003432962037922312, |
| "loss": 0.3908054232597351, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.404332129963899, |
| "grad_norm": 0.36350661516189575, |
| "learning_rate": 0.0003427220308823505, |
| "loss": 0.34363672137260437, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.4115523465703972, |
| "grad_norm": 0.3392064869403839, |
| "learning_rate": 0.00034214545102128946, |
| "loss": 0.40172988176345825, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.4187725631768953, |
| "grad_norm": 0.3826808035373688, |
| "learning_rate": 0.0003415664739328418, |
| "loss": 0.4292600452899933, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.4259927797833936, |
| "grad_norm": 0.4102179706096649, |
| "learning_rate": 0.00034098510938122947, |
| "loss": 0.40238481760025024, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.4332129963898916, |
| "grad_norm": 0.38712596893310547, |
| "learning_rate": 0.00034040136717093796, |
| "loss": 0.3980088233947754, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.44043321299639, |
| "grad_norm": 0.35182803869247437, |
| "learning_rate": 0.0003398152571465512, |
| "loss": 0.3569096028804779, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.447653429602888, |
| "grad_norm": 0.4129750728607178, |
| "learning_rate": 0.0003392267891925854, |
| "loss": 0.4729324281215668, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.4548736462093863, |
| "grad_norm": 0.3427615463733673, |
| "learning_rate": 0.000338635973233322, |
| "loss": 0.38466876745224, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.4620938628158844, |
| "grad_norm": 0.29937732219696045, |
| "learning_rate": 0.0003380428192326411, |
| "loss": 0.24431845545768738, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.4693140794223827, |
| "grad_norm": 0.35563215613365173, |
| "learning_rate": 0.00033744733719385253, |
| "loss": 0.34825772047042847, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.476534296028881, |
| "grad_norm": 0.3395077586174011, |
| "learning_rate": 0.0003368495371595279, |
| "loss": 0.37704023718833923, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.483754512635379, |
| "grad_norm": 0.32358789443969727, |
| "learning_rate": 0.0003362494292113308, |
| "loss": 0.29866379499435425, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.4909747292418771, |
| "grad_norm": 0.44219517707824707, |
| "learning_rate": 0.00033564702346984684, |
| "loss": 0.5036117434501648, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.4981949458483754, |
| "grad_norm": 0.358549565076828, |
| "learning_rate": 0.00033504233009441287, |
| "loss": 0.42227745056152344, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.5054151624548737, |
| "grad_norm": 0.3784799575805664, |
| "learning_rate": 0.0003344353592829461, |
| "loss": 0.4267178773880005, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.5126353790613718, |
| "grad_norm": 0.3619599938392639, |
| "learning_rate": 0.00033382612127177166, |
| "loss": 0.4075395166873932, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.5198555956678699, |
| "grad_norm": 0.44484809041023254, |
| "learning_rate": 0.0003332146263354501, |
| "loss": 0.5016494989395142, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.5270758122743682, |
| "grad_norm": 0.34379443526268005, |
| "learning_rate": 0.00033260088478660407, |
| "loss": 0.317508339881897, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.5342960288808665, |
| "grad_norm": 0.4008065164089203, |
| "learning_rate": 0.0003319849069757446, |
| "loss": 0.3974913954734802, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.5415162454873648, |
| "grad_norm": 0.4006946384906769, |
| "learning_rate": 0.00033136670329109624, |
| "loss": 0.5162532925605774, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.5487364620938628, |
| "grad_norm": 0.32255908846855164, |
| "learning_rate": 0.0003307462841584223, |
| "loss": 0.37669965624809265, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.555956678700361, |
| "grad_norm": 0.7169090509414673, |
| "learning_rate": 0.0003301236600408484, |
| "loss": 0.4477896988391876, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.5631768953068592, |
| "grad_norm": 0.3694959580898285, |
| "learning_rate": 0.00032949884143868675, |
| "loss": 0.45744967460632324, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.5703971119133575, |
| "grad_norm": 0.3709769546985626, |
| "learning_rate": 0.0003288718388892583, |
| "loss": 0.4952532947063446, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.5776173285198556, |
| "grad_norm": 0.29159069061279297, |
| "learning_rate": 0.00032824266296671567, |
| "loss": 0.28994375467300415, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.5848375451263537, |
| "grad_norm": 0.3600374460220337, |
| "learning_rate": 0.0003276113242818645, |
| "loss": 0.43825557827949524, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.592057761732852, |
| "grad_norm": 0.2843645215034485, |
| "learning_rate": 0.0003269778334819846, |
| "loss": 0.32124435901641846, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.5992779783393503, |
| "grad_norm": 0.3688318431377411, |
| "learning_rate": 0.0003263422012506502, |
| "loss": 0.4742942750453949, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.6064981949458483, |
| "grad_norm": 0.3213002383708954, |
| "learning_rate": 0.00032570443830755015, |
| "loss": 0.42100387811660767, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.6137184115523464, |
| "grad_norm": 0.3240319490432739, |
| "learning_rate": 0.0003250645554083068, |
| "loss": 0.29403799772262573, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.6209386281588447, |
| "grad_norm": 0.3569975197315216, |
| "learning_rate": 0.0003244225633442948, |
| "loss": 0.4555712938308716, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.628158844765343, |
| "grad_norm": 0.43396008014678955, |
| "learning_rate": 0.00032377847294245895, |
| "loss": 0.4877527356147766, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.6353790613718413, |
| "grad_norm": 0.3694853186607361, |
| "learning_rate": 0.0003231322950651316, |
| "loss": 0.41299888491630554, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.6425992779783394, |
| "grad_norm": 0.3358525335788727, |
| "learning_rate": 0.00032248404060985, |
| "loss": 0.4551587998867035, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.6498194945848375, |
| "grad_norm": 0.38492903113365173, |
| "learning_rate": 0.0003218337205091715, |
| "loss": 0.4316423535346985, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.6570397111913358, |
| "grad_norm": 0.34664952754974365, |
| "learning_rate": 0.0003211813457304902, |
| "loss": 0.39314523339271545, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.664259927797834, |
| "grad_norm": 0.3254722058773041, |
| "learning_rate": 0.00032052692727585133, |
| "loss": 0.3327226936817169, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.6714801444043321, |
| "grad_norm": 0.4386709928512573, |
| "learning_rate": 0.0003198704761817658, |
| "loss": 0.4558185935020447, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.6787003610108302, |
| "grad_norm": 0.5447010397911072, |
| "learning_rate": 0.00031921200351902446, |
| "loss": 0.48867422342300415, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.6859205776173285, |
| "grad_norm": 0.2976516783237457, |
| "learning_rate": 0.000318551520392511, |
| "loss": 0.3376544117927551, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.6931407942238268, |
| "grad_norm": 0.3854595124721527, |
| "learning_rate": 0.00031788903794101477, |
| "loss": 0.45083147287368774, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.7003610108303249, |
| "grad_norm": 0.37051668763160706, |
| "learning_rate": 0.00031722456733704297, |
| "loss": 0.4483460485935211, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.707581227436823, |
| "grad_norm": 0.42628687620162964, |
| "learning_rate": 0.0003165581197866322, |
| "loss": 0.5173429250717163, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.7148014440433212, |
| "grad_norm": 0.44184717535972595, |
| "learning_rate": 0.0003158897065291596, |
| "loss": 0.45995786786079407, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.7220216606498195, |
| "grad_norm": 0.3687286376953125, |
| "learning_rate": 0.00031521933883715293, |
| "loss": 0.33321040868759155, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.7292418772563178, |
| "grad_norm": 0.3514050841331482, |
| "learning_rate": 0.00031454702801610103, |
| "loss": 0.3461155891418457, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.736462093862816, |
| "grad_norm": 0.4148181676864624, |
| "learning_rate": 0.0003138727854042627, |
| "loss": 0.4323638677597046, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.743682310469314, |
| "grad_norm": 0.44285446405410767, |
| "learning_rate": 0.0003131966223724756, |
| "loss": 0.4780181646347046, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.7509025270758123, |
| "grad_norm": 0.3196166157722473, |
| "learning_rate": 0.00031251855032396464, |
| "loss": 0.3380797505378723, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.7581227436823106, |
| "grad_norm": 0.48847395181655884, |
| "learning_rate": 0.0003118385806941494, |
| "loss": 0.4878506064414978, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.7653429602888087, |
| "grad_norm": 0.398876816034317, |
| "learning_rate": 0.0003111567249504515, |
| "loss": 0.4532281458377838, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.7725631768953067, |
| "grad_norm": 0.34218019247055054, |
| "learning_rate": 0.0003104729945921012, |
| "loss": 0.3738666772842407, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.779783393501805, |
| "grad_norm": 0.4071042835712433, |
| "learning_rate": 0.0003097874011499433, |
| "loss": 0.4437788724899292, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.7870036101083033, |
| "grad_norm": 0.35893604159355164, |
| "learning_rate": 0.0003090999561862428, |
| "loss": 0.35174691677093506, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.7942238267148014, |
| "grad_norm": 0.34495967626571655, |
| "learning_rate": 0.0003084106712944899, |
| "loss": 0.34072697162628174, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.8014440433212995, |
| "grad_norm": 0.4653945565223694, |
| "learning_rate": 0.0003077195580992045, |
| "loss": 0.45023709535598755, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.8086642599277978, |
| "grad_norm": 0.42124322056770325, |
| "learning_rate": 0.0003070266282557401, |
| "loss": 0.4846741557121277, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.815884476534296, |
| "grad_norm": 0.38208404183387756, |
| "learning_rate": 0.0003063318934500872, |
| "loss": 0.3900390565395355, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.8231046931407944, |
| "grad_norm": 0.3911641538143158, |
| "learning_rate": 0.0003056353653986764, |
| "loss": 0.3795917332172394, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.8303249097472925, |
| "grad_norm": 0.34185168147087097, |
| "learning_rate": 0.00030493705584818065, |
| "loss": 0.39876672625541687, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.8375451263537905, |
| "grad_norm": 0.4229717552661896, |
| "learning_rate": 0.000304236976575317, |
| "loss": 0.357430100440979, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.8447653429602888, |
| "grad_norm": 0.35156992077827454, |
| "learning_rate": 0.0003035351393866485, |
| "loss": 0.37869274616241455, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.8519855595667871, |
| "grad_norm": 0.3945099711418152, |
| "learning_rate": 0.0003028315561183845, |
| "loss": 0.45211949944496155, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.8592057761732852, |
| "grad_norm": 0.31473544239997864, |
| "learning_rate": 0.0003021262386361814, |
| "loss": 0.32624444365501404, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.8664259927797833, |
| "grad_norm": 0.4487520158290863, |
| "learning_rate": 0.00030141919883494247, |
| "loss": 0.48553428053855896, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.8736462093862816, |
| "grad_norm": 0.3847423791885376, |
| "learning_rate": 0.00030071044863861713, |
| "loss": 0.457511842250824, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.8808664259927799, |
| "grad_norm": 0.3822494447231293, |
| "learning_rate": 0.00030000000000000003, |
| "loss": 0.38700124621391296, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.888086642599278, |
| "grad_norm": 0.37934961915016174, |
| "learning_rate": 0.0002992878649005293, |
| "loss": 0.4150720238685608, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.895306859205776, |
| "grad_norm": 0.37126392126083374, |
| "learning_rate": 0.00029857405535008467, |
| "loss": 0.3389851450920105, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.9025270758122743, |
| "grad_norm": 0.3264472484588623, |
| "learning_rate": 0.0002978585833867847, |
| "loss": 0.327554315328598, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.9097472924187726, |
| "grad_norm": 0.3889877200126648, |
| "learning_rate": 0.00029714146107678413, |
| "loss": 0.4393918812274933, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.916967509025271, |
| "grad_norm": 0.4926806092262268, |
| "learning_rate": 0.0002964227005140698, |
| "loss": 0.5468560457229614, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.924187725631769, |
| "grad_norm": 0.4259951114654541, |
| "learning_rate": 0.0002957023138202573, |
| "loss": 0.39216533303260803, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.931407942238267, |
| "grad_norm": 0.39791861176490784, |
| "learning_rate": 0.00029498031314438626, |
| "loss": 0.37104716897010803, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.9386281588447654, |
| "grad_norm": 0.3682475686073303, |
| "learning_rate": 0.0002942567106627155, |
| "loss": 0.41986221075057983, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.9458483754512637, |
| "grad_norm": 0.3230769634246826, |
| "learning_rate": 0.00029353151857851735, |
| "loss": 0.35548532009124756, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.9530685920577617, |
| "grad_norm": 0.4607864320278168, |
| "learning_rate": 0.0002928047491218727, |
| "loss": 0.4246275722980499, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.9602888086642598, |
| "grad_norm": 0.4302678108215332, |
| "learning_rate": 0.0002920764145494638, |
| "loss": 0.42147913575172424, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.967509025270758, |
| "grad_norm": 0.36212071776390076, |
| "learning_rate": 0.00029134652714436814, |
| "loss": 0.4659682512283325, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.9747292418772564, |
| "grad_norm": 0.31535375118255615, |
| "learning_rate": 0.00029061509921585107, |
| "loss": 0.38229790329933167, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.9819494584837545, |
| "grad_norm": 0.3685518801212311, |
| "learning_rate": 0.0002898821430991582, |
| "loss": 0.46467846632003784, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.9891696750902526, |
| "grad_norm": 0.3998563885688782, |
| "learning_rate": 0.0002891476711553077, |
| "loss": 0.42970407009124756, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.9963898916967509, |
| "grad_norm": 0.35800614953041077, |
| "learning_rate": 0.00028841169577088115, |
| "loss": 0.4408795237541199, |
| "step": 277 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.5006654858589172, |
| "learning_rate": 0.0002876742293578155, |
| "loss": 0.38447684049606323, |
| "step": 278 |
| }, |
| { |
| "epoch": 2.0072202166064983, |
| "grad_norm": 0.31330448389053345, |
| "learning_rate": 0.00028693528435319305, |
| "loss": 0.30373793840408325, |
| "step": 279 |
| }, |
| { |
| "epoch": 2.0144404332129966, |
| "grad_norm": 0.29946643114089966, |
| "learning_rate": 0.0002861948732190319, |
| "loss": 0.26699960231781006, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.0216606498194944, |
| "grad_norm": 0.26591944694519043, |
| "learning_rate": 0.0002854530084420762, |
| "loss": 0.23807109892368317, |
| "step": 281 |
| }, |
| { |
| "epoch": 2.0288808664259927, |
| "grad_norm": 0.34681931138038635, |
| "learning_rate": 0.00028470970253358487, |
| "loss": 0.32200539112091064, |
| "step": 282 |
| }, |
| { |
| "epoch": 2.036101083032491, |
| "grad_norm": 0.3542478382587433, |
| "learning_rate": 0.0002839649680291211, |
| "loss": 0.3349723815917969, |
| "step": 283 |
| }, |
| { |
| "epoch": 2.0433212996389893, |
| "grad_norm": 0.3007175624370575, |
| "learning_rate": 0.0002832188174883408, |
| "loss": 0.2172810435295105, |
| "step": 284 |
| }, |
| { |
| "epoch": 2.050541516245487, |
| "grad_norm": 0.381719708442688, |
| "learning_rate": 0.00028247126349478075, |
| "loss": 0.3058876693248749, |
| "step": 285 |
| }, |
| { |
| "epoch": 2.0577617328519855, |
| "grad_norm": 0.4056151211261749, |
| "learning_rate": 0.0002817223186556463, |
| "loss": 0.25402823090553284, |
| "step": 286 |
| }, |
| { |
| "epoch": 2.064981949458484, |
| "grad_norm": 0.5069922804832458, |
| "learning_rate": 0.00028097199560159913, |
| "loss": 0.2375117838382721, |
| "step": 287 |
| }, |
| { |
| "epoch": 2.072202166064982, |
| "grad_norm": 0.574663519859314, |
| "learning_rate": 0.00028022030698654374, |
| "loss": 0.3134710490703583, |
| "step": 288 |
| }, |
| { |
| "epoch": 2.07942238267148, |
| "grad_norm": 0.5239531993865967, |
| "learning_rate": 0.00027946726548741443, |
| "loss": 0.20423674583435059, |
| "step": 289 |
| }, |
| { |
| "epoch": 2.0866425992779782, |
| "grad_norm": 0.5845417976379395, |
| "learning_rate": 0.0002787128838039612, |
| "loss": 0.3297584354877472, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.0938628158844765, |
| "grad_norm": 0.47120949625968933, |
| "learning_rate": 0.00027795717465853585, |
| "loss": 0.32672739028930664, |
| "step": 291 |
| }, |
| { |
| "epoch": 2.101083032490975, |
| "grad_norm": 0.40136632323265076, |
| "learning_rate": 0.00027720015079587743, |
| "loss": 0.23871661722660065, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.108303249097473, |
| "grad_norm": 0.5156406164169312, |
| "learning_rate": 0.00027644182498289677, |
| "loss": 0.23360338807106018, |
| "step": 293 |
| }, |
| { |
| "epoch": 2.115523465703971, |
| "grad_norm": 0.4302510917186737, |
| "learning_rate": 0.0002756822100084621, |
| "loss": 0.31100887060165405, |
| "step": 294 |
| }, |
| { |
| "epoch": 2.1227436823104693, |
| "grad_norm": 0.3673456609249115, |
| "learning_rate": 0.00027492131868318246, |
| "loss": 0.21802300214767456, |
| "step": 295 |
| }, |
| { |
| "epoch": 2.1299638989169676, |
| "grad_norm": 0.32138535380363464, |
| "learning_rate": 0.00027415916383919216, |
| "loss": 0.19008119404315948, |
| "step": 296 |
| }, |
| { |
| "epoch": 2.137184115523466, |
| "grad_norm": 0.3203730285167694, |
| "learning_rate": 0.00027339575832993444, |
| "loss": 0.1889929622411728, |
| "step": 297 |
| }, |
| { |
| "epoch": 2.1444043321299637, |
| "grad_norm": 0.4612288773059845, |
| "learning_rate": 0.0002726311150299443, |
| "loss": 0.22323200106620789, |
| "step": 298 |
| }, |
| { |
| "epoch": 2.151624548736462, |
| "grad_norm": 0.36709311604499817, |
| "learning_rate": 0.00027186524683463155, |
| "loss": 0.20466932654380798, |
| "step": 299 |
| }, |
| { |
| "epoch": 2.1588447653429603, |
| "grad_norm": 0.4253864586353302, |
| "learning_rate": 0.0002710981666600636, |
| "loss": 0.2431459128856659, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.1660649819494586, |
| "grad_norm": 0.4018993675708771, |
| "learning_rate": 0.00027032988744274736, |
| "loss": 0.2751820385456085, |
| "step": 301 |
| }, |
| { |
| "epoch": 2.1732851985559565, |
| "grad_norm": 0.4428861737251282, |
| "learning_rate": 0.000269560422139411, |
| "loss": 0.269814133644104, |
| "step": 302 |
| }, |
| { |
| "epoch": 2.1805054151624548, |
| "grad_norm": 0.47122061252593994, |
| "learning_rate": 0.00026878978372678564, |
| "loss": 0.22829411923885345, |
| "step": 303 |
| }, |
| { |
| "epoch": 2.187725631768953, |
| "grad_norm": 0.4426339268684387, |
| "learning_rate": 0.00026801798520138646, |
| "loss": 0.2637779414653778, |
| "step": 304 |
| }, |
| { |
| "epoch": 2.1949458483754514, |
| "grad_norm": 0.4073690176010132, |
| "learning_rate": 0.0002672450395792935, |
| "loss": 0.1904142051935196, |
| "step": 305 |
| }, |
| { |
| "epoch": 2.2021660649819497, |
| "grad_norm": 0.4896828234195709, |
| "learning_rate": 0.00026647095989593193, |
| "loss": 0.34650635719299316, |
| "step": 306 |
| }, |
| { |
| "epoch": 2.2093862815884475, |
| "grad_norm": 0.48900970816612244, |
| "learning_rate": 0.0002656957592058528, |
| "loss": 0.2542106807231903, |
| "step": 307 |
| }, |
| { |
| "epoch": 2.216606498194946, |
| "grad_norm": 0.4600171446800232, |
| "learning_rate": 0.0002649194505825121, |
| "loss": 0.21008341014385223, |
| "step": 308 |
| }, |
| { |
| "epoch": 2.223826714801444, |
| "grad_norm": 0.43380653858184814, |
| "learning_rate": 0.00026414204711805103, |
| "loss": 0.2683791518211365, |
| "step": 309 |
| }, |
| { |
| "epoch": 2.2310469314079424, |
| "grad_norm": 0.5792773365974426, |
| "learning_rate": 0.0002633635619230746, |
| "loss": 0.33160915970802307, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.2382671480144403, |
| "grad_norm": 0.5626965165138245, |
| "learning_rate": 0.0002625840081264309, |
| "loss": 0.26700398325920105, |
| "step": 311 |
| }, |
| { |
| "epoch": 2.2454873646209386, |
| "grad_norm": 0.4952887296676636, |
| "learning_rate": 0.00026180339887498953, |
| "loss": 0.2689879536628723, |
| "step": 312 |
| }, |
| { |
| "epoch": 2.252707581227437, |
| "grad_norm": 0.4501405656337738, |
| "learning_rate": 0.0002610217473334199, |
| "loss": 0.26021096110343933, |
| "step": 313 |
| }, |
| { |
| "epoch": 2.259927797833935, |
| "grad_norm": 0.36552777886390686, |
| "learning_rate": 0.00026023906668396933, |
| "loss": 0.26156651973724365, |
| "step": 314 |
| }, |
| { |
| "epoch": 2.2671480144404335, |
| "grad_norm": 0.3733731806278229, |
| "learning_rate": 0.00025945537012624054, |
| "loss": 0.21406950056552887, |
| "step": 315 |
| }, |
| { |
| "epoch": 2.2743682310469313, |
| "grad_norm": 0.4240867495536804, |
| "learning_rate": 0.00025867067087696923, |
| "loss": 0.32105109095573425, |
| "step": 316 |
| }, |
| { |
| "epoch": 2.2815884476534296, |
| "grad_norm": 0.4198870360851288, |
| "learning_rate": 0.000257884982169801, |
| "loss": 0.3047807216644287, |
| "step": 317 |
| }, |
| { |
| "epoch": 2.288808664259928, |
| "grad_norm": 0.4444144666194916, |
| "learning_rate": 0.00025709831725506845, |
| "loss": 0.23268437385559082, |
| "step": 318 |
| }, |
| { |
| "epoch": 2.2960288808664258, |
| "grad_norm": 0.41258376836776733, |
| "learning_rate": 0.00025631068939956726, |
| "loss": 0.26838576793670654, |
| "step": 319 |
| }, |
| { |
| "epoch": 2.303249097472924, |
| "grad_norm": 0.5179259777069092, |
| "learning_rate": 0.00025552211188633293, |
| "loss": 0.2882261574268341, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.3104693140794224, |
| "grad_norm": 0.4838998019695282, |
| "learning_rate": 0.0002547325980144166, |
| "loss": 0.30366814136505127, |
| "step": 321 |
| }, |
| { |
| "epoch": 2.3176895306859207, |
| "grad_norm": 0.4862940311431885, |
| "learning_rate": 0.0002539421610986605, |
| "loss": 0.29283270239830017, |
| "step": 322 |
| }, |
| { |
| "epoch": 2.324909747292419, |
| "grad_norm": 0.441685289144516, |
| "learning_rate": 0.000253150814469474, |
| "loss": 0.22393643856048584, |
| "step": 323 |
| }, |
| { |
| "epoch": 2.332129963898917, |
| "grad_norm": 0.39844486117362976, |
| "learning_rate": 0.0002523585714726081, |
| "loss": 0.17744283378124237, |
| "step": 324 |
| }, |
| { |
| "epoch": 2.339350180505415, |
| "grad_norm": 0.4977899491786957, |
| "learning_rate": 0.0002515654454689307, |
| "loss": 0.24069495499134064, |
| "step": 325 |
| }, |
| { |
| "epoch": 2.3465703971119134, |
| "grad_norm": 0.4101859927177429, |
| "learning_rate": 0.0002507714498342016, |
| "loss": 0.25044453144073486, |
| "step": 326 |
| }, |
| { |
| "epoch": 2.3537906137184117, |
| "grad_norm": 0.3521929085254669, |
| "learning_rate": 0.0002499765979588462, |
| "loss": 0.20651675760746002, |
| "step": 327 |
| }, |
| { |
| "epoch": 2.3610108303249095, |
| "grad_norm": 0.5269883871078491, |
| "learning_rate": 0.00024918090324773024, |
| "loss": 0.301647424697876, |
| "step": 328 |
| }, |
| { |
| "epoch": 2.368231046931408, |
| "grad_norm": 0.47672051191329956, |
| "learning_rate": 0.0002483843791199335, |
| "loss": 0.23506474494934082, |
| "step": 329 |
| }, |
| { |
| "epoch": 2.375451263537906, |
| "grad_norm": 0.5546545386314392, |
| "learning_rate": 0.00024758703900852375, |
| "loss": 0.3066970705986023, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.3826714801444044, |
| "grad_norm": 0.3792511820793152, |
| "learning_rate": 0.0002467888963603298, |
| "loss": 0.20687991380691528, |
| "step": 331 |
| }, |
| { |
| "epoch": 2.3898916967509027, |
| "grad_norm": 0.4949963092803955, |
| "learning_rate": 0.00024598996463571474, |
| "loss": 0.2865826189517975, |
| "step": 332 |
| }, |
| { |
| "epoch": 2.3971119133574006, |
| "grad_norm": 0.4250241219997406, |
| "learning_rate": 0.00024519025730834954, |
| "loss": 0.25499409437179565, |
| "step": 333 |
| }, |
| { |
| "epoch": 2.404332129963899, |
| "grad_norm": 0.30932316184043884, |
| "learning_rate": 0.00024438978786498524, |
| "loss": 0.18888786435127258, |
| "step": 334 |
| }, |
| { |
| "epoch": 2.411552346570397, |
| "grad_norm": 0.5183025002479553, |
| "learning_rate": 0.00024358856980522556, |
| "loss": 0.2950358986854553, |
| "step": 335 |
| }, |
| { |
| "epoch": 2.4187725631768955, |
| "grad_norm": 0.46398693323135376, |
| "learning_rate": 0.00024278661664129948, |
| "loss": 0.28177422285079956, |
| "step": 336 |
| }, |
| { |
| "epoch": 2.4259927797833933, |
| "grad_norm": 0.4938473701477051, |
| "learning_rate": 0.00024198394189783317, |
| "loss": 0.31616371870040894, |
| "step": 337 |
| }, |
| { |
| "epoch": 2.4332129963898916, |
| "grad_norm": 0.4439496099948883, |
| "learning_rate": 0.00024118055911162198, |
| "loss": 0.2702808380126953, |
| "step": 338 |
| }, |
| { |
| "epoch": 2.44043321299639, |
| "grad_norm": 0.45705899596214294, |
| "learning_rate": 0.00024037648183140205, |
| "loss": 0.33506399393081665, |
| "step": 339 |
| }, |
| { |
| "epoch": 2.4476534296028882, |
| "grad_norm": 0.4123782217502594, |
| "learning_rate": 0.00023957172361762199, |
| "loss": 0.2569669783115387, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.4548736462093865, |
| "grad_norm": 0.5178057551383972, |
| "learning_rate": 0.00023876629804221402, |
| "loss": 0.2357858568429947, |
| "step": 341 |
| }, |
| { |
| "epoch": 2.4620938628158844, |
| "grad_norm": 0.5206875205039978, |
| "learning_rate": 0.0002379602186883652, |
| "loss": 0.30847087502479553, |
| "step": 342 |
| }, |
| { |
| "epoch": 2.4693140794223827, |
| "grad_norm": 0.4264805018901825, |
| "learning_rate": 0.00023715349915028823, |
| "loss": 0.27529531717300415, |
| "step": 343 |
| }, |
| { |
| "epoch": 2.476534296028881, |
| "grad_norm": 0.5162002444267273, |
| "learning_rate": 0.00023634615303299233, |
| "loss": 0.3068305253982544, |
| "step": 344 |
| }, |
| { |
| "epoch": 2.483754512635379, |
| "grad_norm": 0.45099541544914246, |
| "learning_rate": 0.00023553819395205378, |
| "loss": 0.2681335508823395, |
| "step": 345 |
| }, |
| { |
| "epoch": 2.490974729241877, |
| "grad_norm": 0.49566006660461426, |
| "learning_rate": 0.00023472963553338613, |
| "loss": 0.2626950144767761, |
| "step": 346 |
| }, |
| { |
| "epoch": 2.4981949458483754, |
| "grad_norm": 0.4181882441043854, |
| "learning_rate": 0.00023392049141301055, |
| "loss": 0.2935839891433716, |
| "step": 347 |
| }, |
| { |
| "epoch": 2.5054151624548737, |
| "grad_norm": 0.4298352897167206, |
| "learning_rate": 0.000233110775236826, |
| "loss": 0.2565048038959503, |
| "step": 348 |
| }, |
| { |
| "epoch": 2.512635379061372, |
| "grad_norm": 0.46044519543647766, |
| "learning_rate": 0.00023230050066037872, |
| "loss": 0.2740374207496643, |
| "step": 349 |
| }, |
| { |
| "epoch": 2.51985559566787, |
| "grad_norm": 0.3774242401123047, |
| "learning_rate": 0.00023148968134863233, |
| "loss": 0.23688335716724396, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.527075812274368, |
| "grad_norm": 0.4179275631904602, |
| "learning_rate": 0.00023067833097573714, |
| "loss": 0.2116960883140564, |
| "step": 351 |
| }, |
| { |
| "epoch": 2.5342960288808665, |
| "grad_norm": 0.5179049968719482, |
| "learning_rate": 0.0002298664632247994, |
| "loss": 0.2446313500404358, |
| "step": 352 |
| }, |
| { |
| "epoch": 2.5415162454873648, |
| "grad_norm": 0.44997915625572205, |
| "learning_rate": 0.000229054091787651, |
| "loss": 0.3158394694328308, |
| "step": 353 |
| }, |
| { |
| "epoch": 2.5487364620938626, |
| "grad_norm": 0.41355574131011963, |
| "learning_rate": 0.0002282412303646183, |
| "loss": 0.2720823884010315, |
| "step": 354 |
| }, |
| { |
| "epoch": 2.555956678700361, |
| "grad_norm": 0.36182165145874023, |
| "learning_rate": 0.00022742789266429095, |
| "loss": 0.22511911392211914, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.563176895306859, |
| "grad_norm": 0.43841102719306946, |
| "learning_rate": 0.00022661409240329076, |
| "loss": 0.21212677657604218, |
| "step": 356 |
| }, |
| { |
| "epoch": 2.5703971119133575, |
| "grad_norm": 0.4933500289916992, |
| "learning_rate": 0.0002257998433060407, |
| "loss": 0.3581770956516266, |
| "step": 357 |
| }, |
| { |
| "epoch": 2.577617328519856, |
| "grad_norm": 0.4325965344905853, |
| "learning_rate": 0.00022498515910453296, |
| "loss": 0.3252311646938324, |
| "step": 358 |
| }, |
| { |
| "epoch": 2.5848375451263537, |
| "grad_norm": 0.45968931913375854, |
| "learning_rate": 0.00022417005353809772, |
| "loss": 0.3097744286060333, |
| "step": 359 |
| }, |
| { |
| "epoch": 2.592057761732852, |
| "grad_norm": 0.43083667755126953, |
| "learning_rate": 0.00022335454035317124, |
| "loss": 0.25429341197013855, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.5992779783393503, |
| "grad_norm": 0.4504421353340149, |
| "learning_rate": 0.00022253863330306425, |
| "loss": 0.315784215927124, |
| "step": 361 |
| }, |
| { |
| "epoch": 2.606498194945848, |
| "grad_norm": 0.44943565130233765, |
| "learning_rate": 0.0002217223461477296, |
| "loss": 0.22878926992416382, |
| "step": 362 |
| }, |
| { |
| "epoch": 2.6137184115523464, |
| "grad_norm": 0.4279639720916748, |
| "learning_rate": 0.00022090569265353072, |
| "loss": 0.2623154819011688, |
| "step": 363 |
| }, |
| { |
| "epoch": 2.6209386281588447, |
| "grad_norm": 0.4907088279724121, |
| "learning_rate": 0.00022008868659300905, |
| "loss": 0.317205548286438, |
| "step": 364 |
| }, |
| { |
| "epoch": 2.628158844765343, |
| "grad_norm": 0.49302950501441956, |
| "learning_rate": 0.000219271341744652, |
| "loss": 0.30752891302108765, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.6353790613718413, |
| "grad_norm": 0.38831761479377747, |
| "learning_rate": 0.00021845367189266042, |
| "loss": 0.27115532755851746, |
| "step": 366 |
| }, |
| { |
| "epoch": 2.6425992779783396, |
| "grad_norm": 0.47725188732147217, |
| "learning_rate": 0.00021763569082671635, |
| "loss": 0.20296484231948853, |
| "step": 367 |
| }, |
| { |
| "epoch": 2.6498194945848375, |
| "grad_norm": 0.4198381006717682, |
| "learning_rate": 0.00021681741234175027, |
| "loss": 0.22836127877235413, |
| "step": 368 |
| }, |
| { |
| "epoch": 2.6570397111913358, |
| "grad_norm": 0.5218875408172607, |
| "learning_rate": 0.00021599885023770835, |
| "loss": 0.29816269874572754, |
| "step": 369 |
| }, |
| { |
| "epoch": 2.664259927797834, |
| "grad_norm": 0.4321244955062866, |
| "learning_rate": 0.00021518001831932022, |
| "loss": 0.21631377935409546, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.671480144404332, |
| "grad_norm": 0.4036301374435425, |
| "learning_rate": 0.00021436093039586552, |
| "loss": 0.19903570413589478, |
| "step": 371 |
| }, |
| { |
| "epoch": 2.67870036101083, |
| "grad_norm": 0.543828010559082, |
| "learning_rate": 0.00021354160028094153, |
| "loss": 0.2922933101654053, |
| "step": 372 |
| }, |
| { |
| "epoch": 2.6859205776173285, |
| "grad_norm": 0.5146738290786743, |
| "learning_rate": 0.00021272204179222982, |
| "loss": 0.2749359607696533, |
| "step": 373 |
| }, |
| { |
| "epoch": 2.693140794223827, |
| "grad_norm": 0.47690504789352417, |
| "learning_rate": 0.00021190226875126352, |
| "loss": 0.296896755695343, |
| "step": 374 |
| }, |
| { |
| "epoch": 2.700361010830325, |
| "grad_norm": 0.545520007610321, |
| "learning_rate": 0.000211082294983194, |
| "loss": 0.27578210830688477, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.707581227436823, |
| "grad_norm": 0.42757925391197205, |
| "learning_rate": 0.00021026213431655792, |
| "loss": 0.2315436154603958, |
| "step": 376 |
| }, |
| { |
| "epoch": 2.7148014440433212, |
| "grad_norm": 0.5044121146202087, |
| "learning_rate": 0.0002094418005830439, |
| "loss": 0.34600791335105896, |
| "step": 377 |
| }, |
| { |
| "epoch": 2.7220216606498195, |
| "grad_norm": 0.5155430436134338, |
| "learning_rate": 0.00020862130761725917, |
| "loss": 0.3247469663619995, |
| "step": 378 |
| }, |
| { |
| "epoch": 2.729241877256318, |
| "grad_norm": 0.5196051001548767, |
| "learning_rate": 0.00020780066925649634, |
| "loss": 0.3277815878391266, |
| "step": 379 |
| }, |
| { |
| "epoch": 2.7364620938628157, |
| "grad_norm": 0.44720444083213806, |
| "learning_rate": 0.00020697989934050025, |
| "loss": 0.25156158208847046, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.743682310469314, |
| "grad_norm": 0.43984708189964294, |
| "learning_rate": 0.00020615901171123412, |
| "loss": 0.28022634983062744, |
| "step": 381 |
| }, |
| { |
| "epoch": 2.7509025270758123, |
| "grad_norm": 0.47703853249549866, |
| "learning_rate": 0.00020533802021264648, |
| "loss": 0.2780344486236572, |
| "step": 382 |
| }, |
| { |
| "epoch": 2.7581227436823106, |
| "grad_norm": 0.43746253848075867, |
| "learning_rate": 0.00020451693869043763, |
| "loss": 0.23613542318344116, |
| "step": 383 |
| }, |
| { |
| "epoch": 2.765342960288809, |
| "grad_norm": 0.41468992829322815, |
| "learning_rate": 0.00020369578099182597, |
| "loss": 0.3185104429721832, |
| "step": 384 |
| }, |
| { |
| "epoch": 2.7725631768953067, |
| "grad_norm": 0.5149108171463013, |
| "learning_rate": 0.0002028745609653147, |
| "loss": 0.2768135070800781, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.779783393501805, |
| "grad_norm": 0.47348129749298096, |
| "learning_rate": 0.00020205329246045798, |
| "loss": 0.31962212920188904, |
| "step": 386 |
| }, |
| { |
| "epoch": 2.7870036101083033, |
| "grad_norm": 0.4738944172859192, |
| "learning_rate": 0.00020123198932762778, |
| "loss": 0.23498407006263733, |
| "step": 387 |
| }, |
| { |
| "epoch": 2.794223826714801, |
| "grad_norm": 0.48899075388908386, |
| "learning_rate": 0.0002004106654177798, |
| "loss": 0.23857903480529785, |
| "step": 388 |
| }, |
| { |
| "epoch": 2.8014440433212995, |
| "grad_norm": 0.45867517590522766, |
| "learning_rate": 0.00019958933458222026, |
| "loss": 0.3246588706970215, |
| "step": 389 |
| }, |
| { |
| "epoch": 2.808664259927798, |
| "grad_norm": 0.5840234160423279, |
| "learning_rate": 0.0001987680106723723, |
| "loss": 0.2935522198677063, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.815884476534296, |
| "grad_norm": 0.3764658272266388, |
| "learning_rate": 0.00019794670753954204, |
| "loss": 0.2096547782421112, |
| "step": 391 |
| }, |
| { |
| "epoch": 2.8231046931407944, |
| "grad_norm": 0.44675493240356445, |
| "learning_rate": 0.00019712543903468535, |
| "loss": 0.2988222539424896, |
| "step": 392 |
| }, |
| { |
| "epoch": 2.8303249097472927, |
| "grad_norm": 0.43063509464263916, |
| "learning_rate": 0.00019630421900817407, |
| "loss": 0.2222481369972229, |
| "step": 393 |
| }, |
| { |
| "epoch": 2.8375451263537905, |
| "grad_norm": 0.47969475388526917, |
| "learning_rate": 0.00019548306130956242, |
| "loss": 0.26167353987693787, |
| "step": 394 |
| }, |
| { |
| "epoch": 2.844765342960289, |
| "grad_norm": 0.47235366702079773, |
| "learning_rate": 0.00019466197978735354, |
| "loss": 0.30541300773620605, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.851985559566787, |
| "grad_norm": 0.47744160890579224, |
| "learning_rate": 0.00019384098828876598, |
| "loss": 0.23645427823066711, |
| "step": 396 |
| }, |
| { |
| "epoch": 2.859205776173285, |
| "grad_norm": 0.5037774443626404, |
| "learning_rate": 0.00019302010065949988, |
| "loss": 0.26774662733078003, |
| "step": 397 |
| }, |
| { |
| "epoch": 2.8664259927797833, |
| "grad_norm": 0.4689895510673523, |
| "learning_rate": 0.0001921993307435037, |
| "loss": 0.3405228853225708, |
| "step": 398 |
| }, |
| { |
| "epoch": 2.8736462093862816, |
| "grad_norm": 0.44282686710357666, |
| "learning_rate": 0.00019137869238274093, |
| "loss": 0.26063668727874756, |
| "step": 399 |
| }, |
| { |
| "epoch": 2.88086642599278, |
| "grad_norm": 0.4379267990589142, |
| "learning_rate": 0.0001905581994169562, |
| "loss": 0.26478758454322815, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.888086642599278, |
| "grad_norm": 0.519749641418457, |
| "learning_rate": 0.00018973786568344205, |
| "loss": 0.31215131282806396, |
| "step": 401 |
| }, |
| { |
| "epoch": 2.895306859205776, |
| "grad_norm": 0.4515770375728607, |
| "learning_rate": 0.000188917705016806, |
| "loss": 0.2881377339363098, |
| "step": 402 |
| }, |
| { |
| "epoch": 2.9025270758122743, |
| "grad_norm": 0.4650344252586365, |
| "learning_rate": 0.0001880977312487365, |
| "loss": 0.2557958960533142, |
| "step": 403 |
| }, |
| { |
| "epoch": 2.9097472924187726, |
| "grad_norm": 0.4621173143386841, |
| "learning_rate": 0.0001872779582077702, |
| "loss": 0.23401126265525818, |
| "step": 404 |
| }, |
| { |
| "epoch": 2.916967509025271, |
| "grad_norm": 0.4094926118850708, |
| "learning_rate": 0.00018645839971905852, |
| "loss": 0.24556361138820648, |
| "step": 405 |
| }, |
| { |
| "epoch": 2.9241877256317688, |
| "grad_norm": 0.48592710494995117, |
| "learning_rate": 0.0001856390696041345, |
| "loss": 0.27918654680252075, |
| "step": 406 |
| }, |
| { |
| "epoch": 2.931407942238267, |
| "grad_norm": 0.4039171040058136, |
| "learning_rate": 0.00018481998168067983, |
| "loss": 0.22918914258480072, |
| "step": 407 |
| }, |
| { |
| "epoch": 2.9386281588447654, |
| "grad_norm": 0.5521522760391235, |
| "learning_rate": 0.0001840011497622917, |
| "loss": 0.35596632957458496, |
| "step": 408 |
| }, |
| { |
| "epoch": 2.9458483754512637, |
| "grad_norm": 0.5527123212814331, |
| "learning_rate": 0.0001831825876582498, |
| "loss": 0.25526192784309387, |
| "step": 409 |
| }, |
| { |
| "epoch": 2.953068592057762, |
| "grad_norm": 0.4386238157749176, |
| "learning_rate": 0.0001823643091732837, |
| "loss": 0.2627878189086914, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.96028880866426, |
| "grad_norm": 0.48318973183631897, |
| "learning_rate": 0.00018154632810733962, |
| "loss": 0.29408901929855347, |
| "step": 411 |
| }, |
| { |
| "epoch": 2.967509025270758, |
| "grad_norm": 0.5193997025489807, |
| "learning_rate": 0.00018072865825534805, |
| "loss": 0.2762555480003357, |
| "step": 412 |
| }, |
| { |
| "epoch": 2.9747292418772564, |
| "grad_norm": 0.488490492105484, |
| "learning_rate": 0.000179911313406991, |
| "loss": 0.3119064271450043, |
| "step": 413 |
| }, |
| { |
| "epoch": 2.9819494584837543, |
| "grad_norm": 0.4461340010166168, |
| "learning_rate": 0.00017909430734646935, |
| "loss": 0.2738839089870453, |
| "step": 414 |
| }, |
| { |
| "epoch": 2.9891696750902526, |
| "grad_norm": 0.4488433003425598, |
| "learning_rate": 0.0001782776538522704, |
| "loss": 0.25788843631744385, |
| "step": 415 |
| }, |
| { |
| "epoch": 2.996389891696751, |
| "grad_norm": 0.5179741382598877, |
| "learning_rate": 0.0001774613666969358, |
| "loss": 0.2556946575641632, |
| "step": 416 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.6138741374015808, |
| "learning_rate": 0.00017664545964682878, |
| "loss": 0.23274531960487366, |
| "step": 417 |
| }, |
| { |
| "epoch": 3.0072202166064983, |
| "grad_norm": 0.27745357155799866, |
| "learning_rate": 0.00017582994646190233, |
| "loss": 0.14754480123519897, |
| "step": 418 |
| }, |
| { |
| "epoch": 3.0144404332129966, |
| "grad_norm": 0.40759217739105225, |
| "learning_rate": 0.00017501484089546708, |
| "loss": 0.21522817015647888, |
| "step": 419 |
| }, |
| { |
| "epoch": 3.0216606498194944, |
| "grad_norm": 0.3123788833618164, |
| "learning_rate": 0.00017420015669395938, |
| "loss": 0.15608534216880798, |
| "step": 420 |
| }, |
| { |
| "epoch": 3.0288808664259927, |
| "grad_norm": 0.4599035978317261, |
| "learning_rate": 0.00017338590759670934, |
| "loss": 0.1309528797864914, |
| "step": 421 |
| }, |
| { |
| "epoch": 3.036101083032491, |
| "grad_norm": 0.38352325558662415, |
| "learning_rate": 0.00017257210733570916, |
| "loss": 0.1724625676870346, |
| "step": 422 |
| }, |
| { |
| "epoch": 3.0433212996389893, |
| "grad_norm": 0.44771838188171387, |
| "learning_rate": 0.00017175876963538178, |
| "loss": 0.1661359965801239, |
| "step": 423 |
| }, |
| { |
| "epoch": 3.050541516245487, |
| "grad_norm": 0.5998123288154602, |
| "learning_rate": 0.00017094590821234906, |
| "loss": 0.1318468153476715, |
| "step": 424 |
| }, |
| { |
| "epoch": 3.0577617328519855, |
| "grad_norm": 0.47632187604904175, |
| "learning_rate": 0.00017013353677520064, |
| "loss": 0.10464094579219818, |
| "step": 425 |
| }, |
| { |
| "epoch": 3.064981949458484, |
| "grad_norm": 0.3745962083339691, |
| "learning_rate": 0.00016932166902426288, |
| "loss": 0.10677687078714371, |
| "step": 426 |
| }, |
| { |
| "epoch": 3.072202166064982, |
| "grad_norm": 0.6340925693511963, |
| "learning_rate": 0.00016851031865136763, |
| "loss": 0.1600916087627411, |
| "step": 427 |
| }, |
| { |
| "epoch": 3.07942238267148, |
| "grad_norm": 0.8183390498161316, |
| "learning_rate": 0.00016769949933962127, |
| "loss": 0.13513822853565216, |
| "step": 428 |
| }, |
| { |
| "epoch": 3.0866425992779782, |
| "grad_norm": 0.8068543076515198, |
| "learning_rate": 0.00016688922476317398, |
| "loss": 0.14319300651550293, |
| "step": 429 |
| }, |
| { |
| "epoch": 3.0938628158844765, |
| "grad_norm": 0.6381204128265381, |
| "learning_rate": 0.00016607950858698946, |
| "loss": 0.1376371830701828, |
| "step": 430 |
| }, |
| { |
| "epoch": 3.101083032490975, |
| "grad_norm": 0.5576755404472351, |
| "learning_rate": 0.00016527036446661395, |
| "loss": 0.14407199621200562, |
| "step": 431 |
| }, |
| { |
| "epoch": 3.108303249097473, |
| "grad_norm": 0.6089445948600769, |
| "learning_rate": 0.00016446180604794627, |
| "loss": 0.1575869917869568, |
| "step": 432 |
| }, |
| { |
| "epoch": 3.115523465703971, |
| "grad_norm": 0.6250684857368469, |
| "learning_rate": 0.0001636538469670077, |
| "loss": 0.17905279994010925, |
| "step": 433 |
| }, |
| { |
| "epoch": 3.1227436823104693, |
| "grad_norm": 0.4243159592151642, |
| "learning_rate": 0.00016284650084971185, |
| "loss": 0.12401723116636276, |
| "step": 434 |
| }, |
| { |
| "epoch": 3.1299638989169676, |
| "grad_norm": 0.4369412362575531, |
| "learning_rate": 0.00016203978131163485, |
| "loss": 0.11108125746250153, |
| "step": 435 |
| }, |
| { |
| "epoch": 3.137184115523466, |
| "grad_norm": 0.45113661885261536, |
| "learning_rate": 0.000161233701957786, |
| "loss": 0.1364113986492157, |
| "step": 436 |
| }, |
| { |
| "epoch": 3.1444043321299637, |
| "grad_norm": 0.33899277448654175, |
| "learning_rate": 0.00016042827638237806, |
| "loss": 0.1253184676170349, |
| "step": 437 |
| }, |
| { |
| "epoch": 3.151624548736462, |
| "grad_norm": 0.48023247718811035, |
| "learning_rate": 0.000159623518168598, |
| "loss": 0.1729699671268463, |
| "step": 438 |
| }, |
| { |
| "epoch": 3.1588447653429603, |
| "grad_norm": 0.4260300397872925, |
| "learning_rate": 0.00015881944088837804, |
| "loss": 0.1630483865737915, |
| "step": 439 |
| }, |
| { |
| "epoch": 3.1660649819494586, |
| "grad_norm": 0.34896689653396606, |
| "learning_rate": 0.00015801605810216685, |
| "loss": 0.14477582275867462, |
| "step": 440 |
| }, |
| { |
| "epoch": 3.1732851985559565, |
| "grad_norm": 0.46189841628074646, |
| "learning_rate": 0.00015721338335870057, |
| "loss": 0.16415920853614807, |
| "step": 441 |
| }, |
| { |
| "epoch": 3.1805054151624548, |
| "grad_norm": 0.40856343507766724, |
| "learning_rate": 0.00015641143019477446, |
| "loss": 0.12028312683105469, |
| "step": 442 |
| }, |
| { |
| "epoch": 3.187725631768953, |
| "grad_norm": 0.36561018228530884, |
| "learning_rate": 0.00015561021213501483, |
| "loss": 0.12599362432956696, |
| "step": 443 |
| }, |
| { |
| "epoch": 3.1949458483754514, |
| "grad_norm": 0.4341471493244171, |
| "learning_rate": 0.0001548097426916505, |
| "loss": 0.14446969330310822, |
| "step": 444 |
| }, |
| { |
| "epoch": 3.2021660649819497, |
| "grad_norm": 0.3864421844482422, |
| "learning_rate": 0.00015401003536428534, |
| "loss": 0.11642799526453018, |
| "step": 445 |
| }, |
| { |
| "epoch": 3.2093862815884475, |
| "grad_norm": 0.5154263973236084, |
| "learning_rate": 0.00015321110363967035, |
| "loss": 0.17028403282165527, |
| "step": 446 |
| }, |
| { |
| "epoch": 3.216606498194946, |
| "grad_norm": 0.4077145755290985, |
| "learning_rate": 0.0001524129609914763, |
| "loss": 0.11544163525104523, |
| "step": 447 |
| }, |
| { |
| "epoch": 3.223826714801444, |
| "grad_norm": 0.41609570384025574, |
| "learning_rate": 0.00015161562088006648, |
| "loss": 0.1203891783952713, |
| "step": 448 |
| }, |
| { |
| "epoch": 3.2310469314079424, |
| "grad_norm": 0.430624783039093, |
| "learning_rate": 0.00015081909675226975, |
| "loss": 0.09192588925361633, |
| "step": 449 |
| }, |
| { |
| "epoch": 3.2382671480144403, |
| "grad_norm": 0.5538221001625061, |
| "learning_rate": 0.0001500234020411538, |
| "loss": 0.16532525420188904, |
| "step": 450 |
| }, |
| { |
| "epoch": 3.2454873646209386, |
| "grad_norm": 0.6360041499137878, |
| "learning_rate": 0.00014922855016579845, |
| "loss": 0.13534685969352722, |
| "step": 451 |
| }, |
| { |
| "epoch": 3.252707581227437, |
| "grad_norm": 0.556538462638855, |
| "learning_rate": 0.00014843455453106925, |
| "loss": 0.16164641082286835, |
| "step": 452 |
| }, |
| { |
| "epoch": 3.259927797833935, |
| "grad_norm": 0.8166404366493225, |
| "learning_rate": 0.00014764142852739193, |
| "loss": 0.17865729331970215, |
| "step": 453 |
| }, |
| { |
| "epoch": 3.2671480144404335, |
| "grad_norm": 0.6231580972671509, |
| "learning_rate": 0.00014684918553052603, |
| "loss": 0.11281149089336395, |
| "step": 454 |
| }, |
| { |
| "epoch": 3.2743682310469313, |
| "grad_norm": 0.5884938836097717, |
| "learning_rate": 0.00014605783890133948, |
| "loss": 0.15221013128757477, |
| "step": 455 |
| }, |
| { |
| "epoch": 3.2815884476534296, |
| "grad_norm": 0.40336933732032776, |
| "learning_rate": 0.00014526740198558344, |
| "loss": 0.08725619316101074, |
| "step": 456 |
| }, |
| { |
| "epoch": 3.288808664259928, |
| "grad_norm": 0.7082973718643188, |
| "learning_rate": 0.0001444778881136671, |
| "loss": 0.1795981079339981, |
| "step": 457 |
| }, |
| { |
| "epoch": 3.2960288808664258, |
| "grad_norm": 0.44312670826911926, |
| "learning_rate": 0.0001436893106004328, |
| "loss": 0.11532506346702576, |
| "step": 458 |
| }, |
| { |
| "epoch": 3.303249097472924, |
| "grad_norm": 0.4892312288284302, |
| "learning_rate": 0.0001429016827449316, |
| "loss": 0.15218153595924377, |
| "step": 459 |
| }, |
| { |
| "epoch": 3.3104693140794224, |
| "grad_norm": 0.36000511050224304, |
| "learning_rate": 0.000142115017830199, |
| "loss": 0.1174839586019516, |
| "step": 460 |
| }, |
| { |
| "epoch": 3.3176895306859207, |
| "grad_norm": 0.42502933740615845, |
| "learning_rate": 0.00014132932912303082, |
| "loss": 0.1354883909225464, |
| "step": 461 |
| }, |
| { |
| "epoch": 3.324909747292419, |
| "grad_norm": 0.5004318356513977, |
| "learning_rate": 0.00014054462987375947, |
| "loss": 0.14879979193210602, |
| "step": 462 |
| }, |
| { |
| "epoch": 3.332129963898917, |
| "grad_norm": 0.5428838729858398, |
| "learning_rate": 0.00013976093331603072, |
| "loss": 0.1601594090461731, |
| "step": 463 |
| }, |
| { |
| "epoch": 3.339350180505415, |
| "grad_norm": 0.5523360967636108, |
| "learning_rate": 0.00013897825266658015, |
| "loss": 0.1349392980337143, |
| "step": 464 |
| }, |
| { |
| "epoch": 3.3465703971119134, |
| "grad_norm": 0.4672771394252777, |
| "learning_rate": 0.00013819660112501054, |
| "loss": 0.1563996523618698, |
| "step": 465 |
| }, |
| { |
| "epoch": 3.3537906137184117, |
| "grad_norm": 0.4747507870197296, |
| "learning_rate": 0.00013741599187356917, |
| "loss": 0.12792187929153442, |
| "step": 466 |
| }, |
| { |
| "epoch": 3.3610108303249095, |
| "grad_norm": 0.4293171167373657, |
| "learning_rate": 0.00013663643807692547, |
| "loss": 0.13704359531402588, |
| "step": 467 |
| }, |
| { |
| "epoch": 3.368231046931408, |
| "grad_norm": 0.406646728515625, |
| "learning_rate": 0.00013585795288194902, |
| "loss": 0.14688344299793243, |
| "step": 468 |
| }, |
| { |
| "epoch": 3.375451263537906, |
| "grad_norm": 0.44870486855506897, |
| "learning_rate": 0.00013508054941748792, |
| "loss": 0.11994664371013641, |
| "step": 469 |
| }, |
| { |
| "epoch": 3.3826714801444044, |
| "grad_norm": 0.46383994817733765, |
| "learning_rate": 0.00013430424079414728, |
| "loss": 0.13228918612003326, |
| "step": 470 |
| }, |
| { |
| "epoch": 3.3898916967509027, |
| "grad_norm": 0.873016893863678, |
| "learning_rate": 0.0001335290401040681, |
| "loss": 0.14495904743671417, |
| "step": 471 |
| }, |
| { |
| "epoch": 3.3971119133574006, |
| "grad_norm": 0.5575613975524902, |
| "learning_rate": 0.0001327549604207066, |
| "loss": 0.17171433568000793, |
| "step": 472 |
| }, |
| { |
| "epoch": 3.404332129963899, |
| "grad_norm": 0.541860818862915, |
| "learning_rate": 0.00013198201479861353, |
| "loss": 0.14465925097465515, |
| "step": 473 |
| }, |
| { |
| "epoch": 3.411552346570397, |
| "grad_norm": 0.5210021734237671, |
| "learning_rate": 0.00013121021627321438, |
| "loss": 0.15102122724056244, |
| "step": 474 |
| }, |
| { |
| "epoch": 3.4187725631768955, |
| "grad_norm": 0.46373456716537476, |
| "learning_rate": 0.00013043957786058904, |
| "loss": 0.15518702566623688, |
| "step": 475 |
| }, |
| { |
| "epoch": 3.4259927797833933, |
| "grad_norm": 0.48513785004615784, |
| "learning_rate": 0.00012967011255725263, |
| "loss": 0.14110815525054932, |
| "step": 476 |
| }, |
| { |
| "epoch": 3.4332129963898916, |
| "grad_norm": 0.5956123471260071, |
| "learning_rate": 0.0001289018333399364, |
| "loss": 0.14913466572761536, |
| "step": 477 |
| }, |
| { |
| "epoch": 3.44043321299639, |
| "grad_norm": 0.5387746095657349, |
| "learning_rate": 0.0001281347531653685, |
| "loss": 0.14882197976112366, |
| "step": 478 |
| }, |
| { |
| "epoch": 3.4476534296028882, |
| "grad_norm": 0.43160125613212585, |
| "learning_rate": 0.00012736888497005578, |
| "loss": 0.13204768300056458, |
| "step": 479 |
| }, |
| { |
| "epoch": 3.4548736462093865, |
| "grad_norm": 0.41130170226097107, |
| "learning_rate": 0.0001266042416700656, |
| "loss": 0.13427188992500305, |
| "step": 480 |
| }, |
| { |
| "epoch": 3.4620938628158844, |
| "grad_norm": 0.5126029253005981, |
| "learning_rate": 0.00012584083616080786, |
| "loss": 0.12719954550266266, |
| "step": 481 |
| }, |
| { |
| "epoch": 3.4693140794223827, |
| "grad_norm": 0.4994872212409973, |
| "learning_rate": 0.0001250786813168176, |
| "loss": 0.15062202513217926, |
| "step": 482 |
| }, |
| { |
| "epoch": 3.476534296028881, |
| "grad_norm": 0.4697980284690857, |
| "learning_rate": 0.00012431778999153796, |
| "loss": 0.14715459942817688, |
| "step": 483 |
| }, |
| { |
| "epoch": 3.483754512635379, |
| "grad_norm": 0.5356115102767944, |
| "learning_rate": 0.00012355817501710328, |
| "loss": 0.12667298316955566, |
| "step": 484 |
| }, |
| { |
| "epoch": 3.490974729241877, |
| "grad_norm": 0.48629310727119446, |
| "learning_rate": 0.00012279984920412264, |
| "loss": 0.12396648526191711, |
| "step": 485 |
| }, |
| { |
| "epoch": 3.4981949458483754, |
| "grad_norm": 0.6295841336250305, |
| "learning_rate": 0.00012204282534146414, |
| "loss": 0.1579296588897705, |
| "step": 486 |
| }, |
| { |
| "epoch": 3.5054151624548737, |
| "grad_norm": 0.524736225605011, |
| "learning_rate": 0.00012128711619603885, |
| "loss": 0.14176616072654724, |
| "step": 487 |
| }, |
| { |
| "epoch": 3.512635379061372, |
| "grad_norm": 0.4429115355014801, |
| "learning_rate": 0.0001205327345125856, |
| "loss": 0.16075468063354492, |
| "step": 488 |
| }, |
| { |
| "epoch": 3.51985559566787, |
| "grad_norm": 0.5202926993370056, |
| "learning_rate": 0.00011977969301345627, |
| "loss": 0.155876025557518, |
| "step": 489 |
| }, |
| { |
| "epoch": 3.527075812274368, |
| "grad_norm": 0.5090999603271484, |
| "learning_rate": 0.00011902800439840091, |
| "loss": 0.1420796513557434, |
| "step": 490 |
| }, |
| { |
| "epoch": 3.5342960288808665, |
| "grad_norm": 0.5266745686531067, |
| "learning_rate": 0.0001182776813443537, |
| "loss": 0.1911281943321228, |
| "step": 491 |
| }, |
| { |
| "epoch": 3.5415162454873648, |
| "grad_norm": 0.4100717306137085, |
| "learning_rate": 0.00011752873650521934, |
| "loss": 0.12646690011024475, |
| "step": 492 |
| }, |
| { |
| "epoch": 3.5487364620938626, |
| "grad_norm": 0.4897909164428711, |
| "learning_rate": 0.00011678118251165929, |
| "loss": 0.14893396198749542, |
| "step": 493 |
| }, |
| { |
| "epoch": 3.555956678700361, |
| "grad_norm": 0.48633724451065063, |
| "learning_rate": 0.00011603503197087893, |
| "loss": 0.11430861800909042, |
| "step": 494 |
| }, |
| { |
| "epoch": 3.563176895306859, |
| "grad_norm": 0.3797968327999115, |
| "learning_rate": 0.00011529029746641522, |
| "loss": 0.10336866229772568, |
| "step": 495 |
| }, |
| { |
| "epoch": 3.5703971119133575, |
| "grad_norm": 0.4714502692222595, |
| "learning_rate": 0.00011454699155792387, |
| "loss": 0.12609894573688507, |
| "step": 496 |
| }, |
| { |
| "epoch": 3.577617328519856, |
| "grad_norm": 0.5311459898948669, |
| "learning_rate": 0.00011380512678096805, |
| "loss": 0.14762037992477417, |
| "step": 497 |
| }, |
| { |
| "epoch": 3.5848375451263537, |
| "grad_norm": 0.44842103123664856, |
| "learning_rate": 0.00011306471564680703, |
| "loss": 0.1137317568063736, |
| "step": 498 |
| }, |
| { |
| "epoch": 3.592057761732852, |
| "grad_norm": 0.6083047986030579, |
| "learning_rate": 0.0001123257706421845, |
| "loss": 0.1538015455007553, |
| "step": 499 |
| }, |
| { |
| "epoch": 3.5992779783393503, |
| "grad_norm": 0.454277902841568, |
| "learning_rate": 0.00011158830422911882, |
| "loss": 0.13900214433670044, |
| "step": 500 |
| }, |
| { |
| "epoch": 3.606498194945848, |
| "grad_norm": 0.46477752923965454, |
| "learning_rate": 0.00011085232884469236, |
| "loss": 0.1317400485277176, |
| "step": 501 |
| }, |
| { |
| "epoch": 3.6137184115523464, |
| "grad_norm": 0.5491266250610352, |
| "learning_rate": 0.00011011785690084178, |
| "loss": 0.20672887563705444, |
| "step": 502 |
| }, |
| { |
| "epoch": 3.6209386281588447, |
| "grad_norm": 0.48666200041770935, |
| "learning_rate": 0.00010938490078414902, |
| "loss": 0.15307360887527466, |
| "step": 503 |
| }, |
| { |
| "epoch": 3.628158844765343, |
| "grad_norm": 0.557000458240509, |
| "learning_rate": 0.00010865347285563189, |
| "loss": 0.18239088356494904, |
| "step": 504 |
| }, |
| { |
| "epoch": 3.6353790613718413, |
| "grad_norm": 0.45608651638031006, |
| "learning_rate": 0.0001079235854505362, |
| "loss": 0.15936806797981262, |
| "step": 505 |
| }, |
| { |
| "epoch": 3.6425992779783396, |
| "grad_norm": 0.455575555562973, |
| "learning_rate": 0.00010719525087812736, |
| "loss": 0.12316213548183441, |
| "step": 506 |
| }, |
| { |
| "epoch": 3.6498194945848375, |
| "grad_norm": 0.48413556814193726, |
| "learning_rate": 0.00010646848142148267, |
| "loss": 0.13552163541316986, |
| "step": 507 |
| }, |
| { |
| "epoch": 3.6570397111913358, |
| "grad_norm": 0.31646862626075745, |
| "learning_rate": 0.0001057432893372846, |
| "loss": 0.09000004827976227, |
| "step": 508 |
| }, |
| { |
| "epoch": 3.664259927797834, |
| "grad_norm": 0.43280190229415894, |
| "learning_rate": 0.00010501968685561379, |
| "loss": 0.13823340833187103, |
| "step": 509 |
| }, |
| { |
| "epoch": 3.671480144404332, |
| "grad_norm": 0.48566341400146484, |
| "learning_rate": 0.00010429768617974271, |
| "loss": 0.1494002640247345, |
| "step": 510 |
| }, |
| { |
| "epoch": 3.67870036101083, |
| "grad_norm": 0.5894930362701416, |
| "learning_rate": 0.00010357729948593022, |
| "loss": 0.16500279307365417, |
| "step": 511 |
| }, |
| { |
| "epoch": 3.6859205776173285, |
| "grad_norm": 0.4345530867576599, |
| "learning_rate": 0.00010285853892321597, |
| "loss": 0.15392449498176575, |
| "step": 512 |
| }, |
| { |
| "epoch": 3.693140794223827, |
| "grad_norm": 0.38582298159599304, |
| "learning_rate": 0.0001021414166132153, |
| "loss": 0.11830861121416092, |
| "step": 513 |
| }, |
| { |
| "epoch": 3.700361010830325, |
| "grad_norm": 0.5571300983428955, |
| "learning_rate": 0.00010142594464991538, |
| "loss": 0.17028063535690308, |
| "step": 514 |
| }, |
| { |
| "epoch": 3.707581227436823, |
| "grad_norm": 0.442558616399765, |
| "learning_rate": 0.00010071213509947078, |
| "loss": 0.13233450055122375, |
| "step": 515 |
| }, |
| { |
| "epoch": 3.7148014440433212, |
| "grad_norm": 0.38991135358810425, |
| "learning_rate": 0.00010000000000000005, |
| "loss": 0.11185239255428314, |
| "step": 516 |
| }, |
| { |
| "epoch": 3.7220216606498195, |
| "grad_norm": 0.46868595480918884, |
| "learning_rate": 9.928955136138292e-05, |
| "loss": 0.13171209394931793, |
| "step": 517 |
| }, |
| { |
| "epoch": 3.729241877256318, |
| "grad_norm": 0.3938763439655304, |
| "learning_rate": 9.858080116505763e-05, |
| "loss": 0.12150520086288452, |
| "step": 518 |
| }, |
| { |
| "epoch": 3.7364620938628157, |
| "grad_norm": 0.5428814888000488, |
| "learning_rate": 9.787376136381865e-05, |
| "loss": 0.17324000597000122, |
| "step": 519 |
| }, |
| { |
| "epoch": 3.743682310469314, |
| "grad_norm": 0.5528351664543152, |
| "learning_rate": 9.716844388161555e-05, |
| "loss": 0.13830417394638062, |
| "step": 520 |
| }, |
| { |
| "epoch": 3.7509025270758123, |
| "grad_norm": 0.43436095118522644, |
| "learning_rate": 9.646486061335151e-05, |
| "loss": 0.12715619802474976, |
| "step": 521 |
| }, |
| { |
| "epoch": 3.7581227436823106, |
| "grad_norm": 0.5142949223518372, |
| "learning_rate": 9.576302342468297e-05, |
| "loss": 0.13084562122821808, |
| "step": 522 |
| }, |
| { |
| "epoch": 3.765342960288809, |
| "grad_norm": 0.6056551933288574, |
| "learning_rate": 9.506294415181942e-05, |
| "loss": 0.14757800102233887, |
| "step": 523 |
| }, |
| { |
| "epoch": 3.7725631768953067, |
| "grad_norm": 0.7759309411048889, |
| "learning_rate": 9.436463460132359e-05, |
| "loss": 0.14157943427562714, |
| "step": 524 |
| }, |
| { |
| "epoch": 3.779783393501805, |
| "grad_norm": 0.5732694268226624, |
| "learning_rate": 9.366810654991276e-05, |
| "loss": 0.12228554487228394, |
| "step": 525 |
| }, |
| { |
| "epoch": 3.7870036101083033, |
| "grad_norm": 0.5100772976875305, |
| "learning_rate": 9.297337174425995e-05, |
| "loss": 0.13228444755077362, |
| "step": 526 |
| }, |
| { |
| "epoch": 3.794223826714801, |
| "grad_norm": 0.3461199104785919, |
| "learning_rate": 9.228044190079553e-05, |
| "loss": 0.11099657416343689, |
| "step": 527 |
| }, |
| { |
| "epoch": 3.8014440433212995, |
| "grad_norm": 0.5344772338867188, |
| "learning_rate": 9.158932870551011e-05, |
| "loss": 0.1298564374446869, |
| "step": 528 |
| }, |
| { |
| "epoch": 3.808664259927798, |
| "grad_norm": 0.5982236266136169, |
| "learning_rate": 9.09000438137573e-05, |
| "loss": 0.13814345002174377, |
| "step": 529 |
| }, |
| { |
| "epoch": 3.815884476534296, |
| "grad_norm": 0.540401816368103, |
| "learning_rate": 9.021259885005678e-05, |
| "loss": 0.15752826631069183, |
| "step": 530 |
| }, |
| { |
| "epoch": 3.8231046931407944, |
| "grad_norm": 0.5778837203979492, |
| "learning_rate": 8.952700540789885e-05, |
| "loss": 0.11452727019786835, |
| "step": 531 |
| }, |
| { |
| "epoch": 3.8303249097472927, |
| "grad_norm": 0.5168759226799011, |
| "learning_rate": 8.884327504954857e-05, |
| "loss": 0.18627649545669556, |
| "step": 532 |
| }, |
| { |
| "epoch": 3.8375451263537905, |
| "grad_norm": 0.5951539278030396, |
| "learning_rate": 8.816141930585067e-05, |
| "loss": 0.17812731862068176, |
| "step": 533 |
| }, |
| { |
| "epoch": 3.844765342960289, |
| "grad_norm": 0.46762675046920776, |
| "learning_rate": 8.748144967603538e-05, |
| "loss": 0.1432511955499649, |
| "step": 534 |
| }, |
| { |
| "epoch": 3.851985559566787, |
| "grad_norm": 0.4484635889530182, |
| "learning_rate": 8.680337762752444e-05, |
| "loss": 0.13658685982227325, |
| "step": 535 |
| }, |
| { |
| "epoch": 3.859205776173285, |
| "grad_norm": 0.518476128578186, |
| "learning_rate": 8.612721459573734e-05, |
| "loss": 0.14582251012325287, |
| "step": 536 |
| }, |
| { |
| "epoch": 3.8664259927797833, |
| "grad_norm": 0.5416932106018066, |
| "learning_rate": 8.545297198389896e-05, |
| "loss": 0.15608924627304077, |
| "step": 537 |
| }, |
| { |
| "epoch": 3.8736462093862816, |
| "grad_norm": 0.502341628074646, |
| "learning_rate": 8.478066116284713e-05, |
| "loss": 0.14149951934814453, |
| "step": 538 |
| }, |
| { |
| "epoch": 3.88086642599278, |
| "grad_norm": 0.3645715117454529, |
| "learning_rate": 8.411029347084049e-05, |
| "loss": 0.1299845278263092, |
| "step": 539 |
| }, |
| { |
| "epoch": 3.888086642599278, |
| "grad_norm": 0.4532802700996399, |
| "learning_rate": 8.344188021336783e-05, |
| "loss": 0.11957640945911407, |
| "step": 540 |
| }, |
| { |
| "epoch": 3.895306859205776, |
| "grad_norm": 0.39594075083732605, |
| "learning_rate": 8.277543266295713e-05, |
| "loss": 0.09765089303255081, |
| "step": 541 |
| }, |
| { |
| "epoch": 3.9025270758122743, |
| "grad_norm": 0.4911992847919464, |
| "learning_rate": 8.211096205898529e-05, |
| "loss": 0.13661810755729675, |
| "step": 542 |
| }, |
| { |
| "epoch": 3.9097472924187726, |
| "grad_norm": 0.41524603962898254, |
| "learning_rate": 8.144847960748904e-05, |
| "loss": 0.11461715400218964, |
| "step": 543 |
| }, |
| { |
| "epoch": 3.916967509025271, |
| "grad_norm": 0.6111493110656738, |
| "learning_rate": 8.078799648097555e-05, |
| "loss": 0.14272424578666687, |
| "step": 544 |
| }, |
| { |
| "epoch": 3.9241877256317688, |
| "grad_norm": 0.6367743611335754, |
| "learning_rate": 8.01295238182342e-05, |
| "loss": 0.15256257355213165, |
| "step": 545 |
| }, |
| { |
| "epoch": 3.931407942238267, |
| "grad_norm": 0.5055350065231323, |
| "learning_rate": 7.947307272414874e-05, |
| "loss": 0.12612725794315338, |
| "step": 546 |
| }, |
| { |
| "epoch": 3.9386281588447654, |
| "grad_norm": 0.6354690790176392, |
| "learning_rate": 7.881865426950979e-05, |
| "loss": 0.18444940447807312, |
| "step": 547 |
| }, |
| { |
| "epoch": 3.9458483754512637, |
| "grad_norm": 0.48749735951423645, |
| "learning_rate": 7.816627949082844e-05, |
| "loss": 0.14657062292099, |
| "step": 548 |
| }, |
| { |
| "epoch": 3.953068592057762, |
| "grad_norm": 0.5144006013870239, |
| "learning_rate": 7.751595939015004e-05, |
| "loss": 0.13369342684745789, |
| "step": 549 |
| }, |
| { |
| "epoch": 3.96028880866426, |
| "grad_norm": 0.6086586117744446, |
| "learning_rate": 7.686770493486834e-05, |
| "loss": 0.12459664046764374, |
| "step": 550 |
| }, |
| { |
| "epoch": 3.967509025270758, |
| "grad_norm": 0.3899252712726593, |
| "learning_rate": 7.622152705754109e-05, |
| "loss": 0.09838660806417465, |
| "step": 551 |
| }, |
| { |
| "epoch": 3.9747292418772564, |
| "grad_norm": 0.5426426529884338, |
| "learning_rate": 7.557743665570524e-05, |
| "loss": 0.12174687534570694, |
| "step": 552 |
| }, |
| { |
| "epoch": 3.9819494584837543, |
| "grad_norm": 0.4566539227962494, |
| "learning_rate": 7.49354445916932e-05, |
| "loss": 0.12463852018117905, |
| "step": 553 |
| }, |
| { |
| "epoch": 3.9891696750902526, |
| "grad_norm": 0.4546673595905304, |
| "learning_rate": 7.429556169244984e-05, |
| "loss": 0.145570307970047, |
| "step": 554 |
| }, |
| { |
| "epoch": 3.996389891696751, |
| "grad_norm": 0.4223948121070862, |
| "learning_rate": 7.365779874934985e-05, |
| "loss": 0.1327395737171173, |
| "step": 555 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.9028692245483398, |
| "learning_rate": 7.302216651801546e-05, |
| "loss": 0.1679631471633911, |
| "step": 556 |
| }, |
| { |
| "epoch": 4.007220216606498, |
| "grad_norm": 0.2356356829404831, |
| "learning_rate": 7.23886757181355e-05, |
| "loss": 0.07719021290540695, |
| "step": 557 |
| }, |
| { |
| "epoch": 4.014440433212997, |
| "grad_norm": 0.2547387480735779, |
| "learning_rate": 7.175733703328438e-05, |
| "loss": 0.08970507979393005, |
| "step": 558 |
| }, |
| { |
| "epoch": 4.021660649819495, |
| "grad_norm": 0.2796124815940857, |
| "learning_rate": 7.112816111074175e-05, |
| "loss": 0.08122967183589935, |
| "step": 559 |
| }, |
| { |
| "epoch": 4.028880866425993, |
| "grad_norm": 0.3060125410556793, |
| "learning_rate": 7.050115856131327e-05, |
| "loss": 0.10071869194507599, |
| "step": 560 |
| }, |
| { |
| "epoch": 4.036101083032491, |
| "grad_norm": 0.23872718214988708, |
| "learning_rate": 6.987633995915164e-05, |
| "loss": 0.07773898541927338, |
| "step": 561 |
| }, |
| { |
| "epoch": 4.043321299638989, |
| "grad_norm": 0.2474278211593628, |
| "learning_rate": 6.925371584157776e-05, |
| "loss": 0.08390785753726959, |
| "step": 562 |
| }, |
| { |
| "epoch": 4.050541516245487, |
| "grad_norm": 0.2561244070529938, |
| "learning_rate": 6.863329670890379e-05, |
| "loss": 0.07564981281757355, |
| "step": 563 |
| }, |
| { |
| "epoch": 4.0577617328519855, |
| "grad_norm": 0.23667839169502258, |
| "learning_rate": 6.801509302425553e-05, |
| "loss": 0.06735668331384659, |
| "step": 564 |
| }, |
| { |
| "epoch": 4.064981949458484, |
| "grad_norm": 0.30244675278663635, |
| "learning_rate": 6.739911521339603e-05, |
| "loss": 0.08246147632598877, |
| "step": 565 |
| }, |
| { |
| "epoch": 4.072202166064982, |
| "grad_norm": 0.322549968957901, |
| "learning_rate": 6.678537366454998e-05, |
| "loss": 0.0768466591835022, |
| "step": 566 |
| }, |
| { |
| "epoch": 4.07942238267148, |
| "grad_norm": 0.3082004487514496, |
| "learning_rate": 6.617387872822842e-05, |
| "loss": 0.07058844715356827, |
| "step": 567 |
| }, |
| { |
| "epoch": 4.086642599277979, |
| "grad_norm": 0.4027414917945862, |
| "learning_rate": 6.556464071705386e-05, |
| "loss": 0.06483431905508041, |
| "step": 568 |
| }, |
| { |
| "epoch": 4.093862815884476, |
| "grad_norm": 0.32241883873939514, |
| "learning_rate": 6.495766990558716e-05, |
| "loss": 0.08380893617868423, |
| "step": 569 |
| }, |
| { |
| "epoch": 4.101083032490974, |
| "grad_norm": 0.6206040978431702, |
| "learning_rate": 6.435297653015319e-05, |
| "loss": 0.08805538713932037, |
| "step": 570 |
| }, |
| { |
| "epoch": 4.108303249097473, |
| "grad_norm": 0.37588536739349365, |
| "learning_rate": 6.375057078866916e-05, |
| "loss": 0.06972259283065796, |
| "step": 571 |
| }, |
| { |
| "epoch": 4.115523465703971, |
| "grad_norm": 0.7320848703384399, |
| "learning_rate": 6.315046284047207e-05, |
| "loss": 0.07265684008598328, |
| "step": 572 |
| }, |
| { |
| "epoch": 4.122743682310469, |
| "grad_norm": 0.6254473328590393, |
| "learning_rate": 6.255266280614747e-05, |
| "loss": 0.06212320178747177, |
| "step": 573 |
| }, |
| { |
| "epoch": 4.129963898916968, |
| "grad_norm": 0.5760471820831299, |
| "learning_rate": 6.195718076735894e-05, |
| "loss": 0.08673934638500214, |
| "step": 574 |
| }, |
| { |
| "epoch": 4.137184115523466, |
| "grad_norm": 0.49967148900032043, |
| "learning_rate": 6.136402676667806e-05, |
| "loss": 0.06496398150920868, |
| "step": 575 |
| }, |
| { |
| "epoch": 4.144404332129964, |
| "grad_norm": 0.43019023537635803, |
| "learning_rate": 6.077321080741469e-05, |
| "loss": 0.06604783982038498, |
| "step": 576 |
| }, |
| { |
| "epoch": 4.1516245487364625, |
| "grad_norm": 0.4466577172279358, |
| "learning_rate": 6.0184742853448796e-05, |
| "loss": 0.06710691004991531, |
| "step": 577 |
| }, |
| { |
| "epoch": 4.15884476534296, |
| "grad_norm": 0.5732907056808472, |
| "learning_rate": 5.959863282906208e-05, |
| "loss": 0.09353934228420258, |
| "step": 578 |
| }, |
| { |
| "epoch": 4.166064981949458, |
| "grad_norm": 0.3326954245567322, |
| "learning_rate": 5.901489061877059e-05, |
| "loss": 0.06768821179866791, |
| "step": 579 |
| }, |
| { |
| "epoch": 4.1732851985559565, |
| "grad_norm": 0.5838833451271057, |
| "learning_rate": 5.843352606715819e-05, |
| "loss": 0.09657922387123108, |
| "step": 580 |
| }, |
| { |
| "epoch": 4.180505415162455, |
| "grad_norm": 0.6080812215805054, |
| "learning_rate": 5.7854548978710586e-05, |
| "loss": 0.07862652093172073, |
| "step": 581 |
| }, |
| { |
| "epoch": 4.187725631768953, |
| "grad_norm": 0.35978272557258606, |
| "learning_rate": 5.7277969117649554e-05, |
| "loss": 0.06750591099262238, |
| "step": 582 |
| }, |
| { |
| "epoch": 4.194945848375451, |
| "grad_norm": 0.5055731534957886, |
| "learning_rate": 5.670379620776882e-05, |
| "loss": 0.07750953733921051, |
| "step": 583 |
| }, |
| { |
| "epoch": 4.20216606498195, |
| "grad_norm": 0.3300122618675232, |
| "learning_rate": 5.613203993226981e-05, |
| "loss": 0.05905657261610031, |
| "step": 584 |
| }, |
| { |
| "epoch": 4.209386281588448, |
| "grad_norm": 0.39663630723953247, |
| "learning_rate": 5.5562709933598156e-05, |
| "loss": 0.06651993840932846, |
| "step": 585 |
| }, |
| { |
| "epoch": 4.216606498194946, |
| "grad_norm": 0.2632470726966858, |
| "learning_rate": 5.4995815813281483e-05, |
| "loss": 0.04662226885557175, |
| "step": 586 |
| }, |
| { |
| "epoch": 4.223826714801444, |
| "grad_norm": 0.3943081796169281, |
| "learning_rate": 5.443136713176724e-05, |
| "loss": 0.055592283606529236, |
| "step": 587 |
| }, |
| { |
| "epoch": 4.231046931407942, |
| "grad_norm": 0.3067891001701355, |
| "learning_rate": 5.386937340826139e-05, |
| "loss": 0.06188460439443588, |
| "step": 588 |
| }, |
| { |
| "epoch": 4.23826714801444, |
| "grad_norm": 0.5765032768249512, |
| "learning_rate": 5.330984412056814e-05, |
| "loss": 0.07393093407154083, |
| "step": 589 |
| }, |
| { |
| "epoch": 4.245487364620939, |
| "grad_norm": 0.3043580949306488, |
| "learning_rate": 5.2752788704929944e-05, |
| "loss": 0.04691813141107559, |
| "step": 590 |
| }, |
| { |
| "epoch": 4.252707581227437, |
| "grad_norm": 0.41853103041648865, |
| "learning_rate": 5.2198216555868206e-05, |
| "loss": 0.08564123511314392, |
| "step": 591 |
| }, |
| { |
| "epoch": 4.259927797833935, |
| "grad_norm": 0.366338849067688, |
| "learning_rate": 5.164613702602519e-05, |
| "loss": 0.06465509533882141, |
| "step": 592 |
| }, |
| { |
| "epoch": 4.2671480144404335, |
| "grad_norm": 0.4903438985347748, |
| "learning_rate": 5.1096559426005994e-05, |
| "loss": 0.0653722956776619, |
| "step": 593 |
| }, |
| { |
| "epoch": 4.274368231046932, |
| "grad_norm": 0.3799297511577606, |
| "learning_rate": 5.054949302422178e-05, |
| "loss": 0.06573019176721573, |
| "step": 594 |
| }, |
| { |
| "epoch": 4.28158844765343, |
| "grad_norm": 0.3962422013282776, |
| "learning_rate": 5.000494704673328e-05, |
| "loss": 0.06330323964357376, |
| "step": 595 |
| }, |
| { |
| "epoch": 4.2888086642599275, |
| "grad_norm": 0.40771204233169556, |
| "learning_rate": 4.946293067709511e-05, |
| "loss": 0.0744054913520813, |
| "step": 596 |
| }, |
| { |
| "epoch": 4.296028880866426, |
| "grad_norm": 0.358303040266037, |
| "learning_rate": 4.892345305620123e-05, |
| "loss": 0.06824557483196259, |
| "step": 597 |
| }, |
| { |
| "epoch": 4.303249097472924, |
| "grad_norm": 0.5681015253067017, |
| "learning_rate": 4.83865232821306e-05, |
| "loss": 0.07922443747520447, |
| "step": 598 |
| }, |
| { |
| "epoch": 4.310469314079422, |
| "grad_norm": 0.40545016527175903, |
| "learning_rate": 4.785215040999354e-05, |
| "loss": 0.06584658473730087, |
| "step": 599 |
| }, |
| { |
| "epoch": 4.317689530685921, |
| "grad_norm": 0.4532371759414673, |
| "learning_rate": 4.73203434517794e-05, |
| "loss": 0.06798739731311798, |
| "step": 600 |
| }, |
| { |
| "epoch": 4.324909747292419, |
| "grad_norm": 0.34823986887931824, |
| "learning_rate": 4.679111137620442e-05, |
| "loss": 0.060036975890398026, |
| "step": 601 |
| }, |
| { |
| "epoch": 4.332129963898917, |
| "grad_norm": 0.7183476686477661, |
| "learning_rate": 4.626446310856027e-05, |
| "loss": 0.09737586230039597, |
| "step": 602 |
| }, |
| { |
| "epoch": 4.3393501805054155, |
| "grad_norm": 0.3646744191646576, |
| "learning_rate": 4.574040753056385e-05, |
| "loss": 0.07068562507629395, |
| "step": 603 |
| }, |
| { |
| "epoch": 4.346570397111913, |
| "grad_norm": 0.7281261682510376, |
| "learning_rate": 4.521895348020744e-05, |
| "loss": 0.0830075815320015, |
| "step": 604 |
| }, |
| { |
| "epoch": 4.353790613718411, |
| "grad_norm": 0.3349117338657379, |
| "learning_rate": 4.470010975160932e-05, |
| "loss": 0.057703591883182526, |
| "step": 605 |
| }, |
| { |
| "epoch": 4.3610108303249095, |
| "grad_norm": 0.4553733766078949, |
| "learning_rate": 4.418388509486591e-05, |
| "loss": 0.07166286557912827, |
| "step": 606 |
| }, |
| { |
| "epoch": 4.368231046931408, |
| "grad_norm": 0.44004717469215393, |
| "learning_rate": 4.3670288215904044e-05, |
| "loss": 0.06402762979269028, |
| "step": 607 |
| }, |
| { |
| "epoch": 4.375451263537906, |
| "grad_norm": 0.4734828472137451, |
| "learning_rate": 4.315932777633391e-05, |
| "loss": 0.06359273195266724, |
| "step": 608 |
| }, |
| { |
| "epoch": 4.382671480144404, |
| "grad_norm": 0.42865970730781555, |
| "learning_rate": 4.265101239330336e-05, |
| "loss": 0.05663272365927696, |
| "step": 609 |
| }, |
| { |
| "epoch": 4.389891696750903, |
| "grad_norm": 0.3049170970916748, |
| "learning_rate": 4.214535063935236e-05, |
| "loss": 0.05514144152402878, |
| "step": 610 |
| }, |
| { |
| "epoch": 4.397111913357401, |
| "grad_norm": 0.5843676328659058, |
| "learning_rate": 4.1642351042268413e-05, |
| "loss": 0.0814075767993927, |
| "step": 611 |
| }, |
| { |
| "epoch": 4.404332129963899, |
| "grad_norm": 0.45971769094467163, |
| "learning_rate": 4.11420220849428e-05, |
| "loss": 0.08407925814390182, |
| "step": 612 |
| }, |
| { |
| "epoch": 4.411552346570397, |
| "grad_norm": 0.3730124235153198, |
| "learning_rate": 4.064437220522759e-05, |
| "loss": 0.08016012609004974, |
| "step": 613 |
| }, |
| { |
| "epoch": 4.418772563176895, |
| "grad_norm": 0.47459709644317627, |
| "learning_rate": 4.0149409795793116e-05, |
| "loss": 0.06777369976043701, |
| "step": 614 |
| }, |
| { |
| "epoch": 4.425992779783393, |
| "grad_norm": 0.43131333589553833, |
| "learning_rate": 3.965714320398668e-05, |
| "loss": 0.06275366246700287, |
| "step": 615 |
| }, |
| { |
| "epoch": 4.433212996389892, |
| "grad_norm": 0.5383766293525696, |
| "learning_rate": 3.9167580731691644e-05, |
| "loss": 0.07567012310028076, |
| "step": 616 |
| }, |
| { |
| "epoch": 4.44043321299639, |
| "grad_norm": 0.5049149990081787, |
| "learning_rate": 3.868073063518749e-05, |
| "loss": 0.07030000537633896, |
| "step": 617 |
| }, |
| { |
| "epoch": 4.447653429602888, |
| "grad_norm": 0.4588138461112976, |
| "learning_rate": 3.819660112501053e-05, |
| "loss": 0.06413187086582184, |
| "step": 618 |
| }, |
| { |
| "epoch": 4.4548736462093865, |
| "grad_norm": 0.5270079374313354, |
| "learning_rate": 3.771520036581535e-05, |
| "loss": 0.08776699006557465, |
| "step": 619 |
| }, |
| { |
| "epoch": 4.462093862815885, |
| "grad_norm": 0.4434908330440521, |
| "learning_rate": 3.723653647623735e-05, |
| "loss": 0.06680673360824585, |
| "step": 620 |
| }, |
| { |
| "epoch": 4.469314079422382, |
| "grad_norm": 0.40275996923446655, |
| "learning_rate": 3.6760617528755684e-05, |
| "loss": 0.06380727887153625, |
| "step": 621 |
| }, |
| { |
| "epoch": 4.4765342960288805, |
| "grad_norm": 0.33545786142349243, |
| "learning_rate": 3.6287451549557016e-05, |
| "loss": 0.06220482289791107, |
| "step": 622 |
| }, |
| { |
| "epoch": 4.483754512635379, |
| "grad_norm": 0.358530193567276, |
| "learning_rate": 3.5817046518400385e-05, |
| "loss": 0.07663938403129578, |
| "step": 623 |
| }, |
| { |
| "epoch": 4.490974729241877, |
| "grad_norm": 0.4760512411594391, |
| "learning_rate": 3.534941036848258e-05, |
| "loss": 0.07023464143276215, |
| "step": 624 |
| }, |
| { |
| "epoch": 4.498194945848375, |
| "grad_norm": 0.5976642966270447, |
| "learning_rate": 3.4884550986304074e-05, |
| "loss": 0.07387922704219818, |
| "step": 625 |
| }, |
| { |
| "epoch": 4.505415162454874, |
| "grad_norm": 0.4269956052303314, |
| "learning_rate": 3.442247621153643e-05, |
| "loss": 0.06992574781179428, |
| "step": 626 |
| }, |
| { |
| "epoch": 4.512635379061372, |
| "grad_norm": 0.5576417446136475, |
| "learning_rate": 3.3963193836889905e-05, |
| "loss": 0.06586994230747223, |
| "step": 627 |
| }, |
| { |
| "epoch": 4.51985559566787, |
| "grad_norm": 0.27365460991859436, |
| "learning_rate": 3.3506711607981824e-05, |
| "loss": 0.05236521363258362, |
| "step": 628 |
| }, |
| { |
| "epoch": 4.527075812274369, |
| "grad_norm": 0.32343900203704834, |
| "learning_rate": 3.305303722320636e-05, |
| "loss": 0.0833154246211052, |
| "step": 629 |
| }, |
| { |
| "epoch": 4.534296028880867, |
| "grad_norm": 0.6516956686973572, |
| "learning_rate": 3.260217833360446e-05, |
| "loss": 0.08919483423233032, |
| "step": 630 |
| }, |
| { |
| "epoch": 4.541516245487364, |
| "grad_norm": 0.3491802513599396, |
| "learning_rate": 3.215414254273468e-05, |
| "loss": 0.06530842185020447, |
| "step": 631 |
| }, |
| { |
| "epoch": 4.548736462093863, |
| "grad_norm": 0.4860411286354065, |
| "learning_rate": 3.170893740654533e-05, |
| "loss": 0.08303852379322052, |
| "step": 632 |
| }, |
| { |
| "epoch": 4.555956678700361, |
| "grad_norm": 0.4222336709499359, |
| "learning_rate": 3.126657043324677e-05, |
| "loss": 0.0689733549952507, |
| "step": 633 |
| }, |
| { |
| "epoch": 4.563176895306859, |
| "grad_norm": 0.6741357445716858, |
| "learning_rate": 3.082704908318474e-05, |
| "loss": 0.07350336015224457, |
| "step": 634 |
| }, |
| { |
| "epoch": 4.5703971119133575, |
| "grad_norm": 0.3562377691268921, |
| "learning_rate": 3.039038076871481e-05, |
| "loss": 0.06916918605566025, |
| "step": 635 |
| }, |
| { |
| "epoch": 4.577617328519856, |
| "grad_norm": 0.5075894594192505, |
| "learning_rate": 2.9956572854077202e-05, |
| "loss": 0.05884088948369026, |
| "step": 636 |
| }, |
| { |
| "epoch": 4.584837545126354, |
| "grad_norm": 0.49467241764068604, |
| "learning_rate": 2.9525632655272482e-05, |
| "loss": 0.05398339778184891, |
| "step": 637 |
| }, |
| { |
| "epoch": 4.5920577617328515, |
| "grad_norm": 0.30880579352378845, |
| "learning_rate": 2.9097567439938478e-05, |
| "loss": 0.06401652097702026, |
| "step": 638 |
| }, |
| { |
| "epoch": 4.59927797833935, |
| "grad_norm": 0.3337753713130951, |
| "learning_rate": 2.8672384427227484e-05, |
| "loss": 0.06975904852151871, |
| "step": 639 |
| }, |
| { |
| "epoch": 4.606498194945848, |
| "grad_norm": 1.321722149848938, |
| "learning_rate": 2.8250090787684437e-05, |
| "loss": 0.07469654083251953, |
| "step": 640 |
| }, |
| { |
| "epoch": 4.613718411552346, |
| "grad_norm": 0.3456924855709076, |
| "learning_rate": 2.783069364312647e-05, |
| "loss": 0.061028316617012024, |
| "step": 641 |
| }, |
| { |
| "epoch": 4.620938628158845, |
| "grad_norm": 0.9587940573692322, |
| "learning_rate": 2.7414200066522067e-05, |
| "loss": 0.08222727477550507, |
| "step": 642 |
| }, |
| { |
| "epoch": 4.628158844765343, |
| "grad_norm": 0.34773939847946167, |
| "learning_rate": 2.7000617081872402e-05, |
| "loss": 0.06201966106891632, |
| "step": 643 |
| }, |
| { |
| "epoch": 4.635379061371841, |
| "grad_norm": 0.4925590455532074, |
| "learning_rate": 2.6589951664092706e-05, |
| "loss": 0.06247459352016449, |
| "step": 644 |
| }, |
| { |
| "epoch": 4.64259927797834, |
| "grad_norm": 0.34186214208602905, |
| "learning_rate": 2.618221073889433e-05, |
| "loss": 0.054603368043899536, |
| "step": 645 |
| }, |
| { |
| "epoch": 4.649819494584838, |
| "grad_norm": 0.3584718406200409, |
| "learning_rate": 2.5777401182668446e-05, |
| "loss": 0.05636471137404442, |
| "step": 646 |
| }, |
| { |
| "epoch": 4.657039711191336, |
| "grad_norm": 0.33370015025138855, |
| "learning_rate": 2.5375529822369815e-05, |
| "loss": 0.06488262116909027, |
| "step": 647 |
| }, |
| { |
| "epoch": 4.664259927797834, |
| "grad_norm": 0.428996205329895, |
| "learning_rate": 2.497660343540147e-05, |
| "loss": 0.08126433193683624, |
| "step": 648 |
| }, |
| { |
| "epoch": 4.671480144404332, |
| "grad_norm": 0.306087464094162, |
| "learning_rate": 2.4580628749500844e-05, |
| "loss": 0.056748297065496445, |
| "step": 649 |
| }, |
| { |
| "epoch": 4.67870036101083, |
| "grad_norm": 0.45556285977363586, |
| "learning_rate": 2.4187612442626016e-05, |
| "loss": 0.08052687346935272, |
| "step": 650 |
| }, |
| { |
| "epoch": 4.6859205776173285, |
| "grad_norm": 0.24860779941082, |
| "learning_rate": 2.3797561142843105e-05, |
| "loss": 0.06610046327114105, |
| "step": 651 |
| }, |
| { |
| "epoch": 4.693140794223827, |
| "grad_norm": 0.452534556388855, |
| "learning_rate": 2.3410481428214603e-05, |
| "loss": 0.056214869022369385, |
| "step": 652 |
| }, |
| { |
| "epoch": 4.700361010830325, |
| "grad_norm": 0.38668516278266907, |
| "learning_rate": 2.302637982668843e-05, |
| "loss": 0.06206169351935387, |
| "step": 653 |
| }, |
| { |
| "epoch": 4.707581227436823, |
| "grad_norm": 0.2503337562084198, |
| "learning_rate": 2.2645262815987622e-05, |
| "loss": 0.06331133097410202, |
| "step": 654 |
| }, |
| { |
| "epoch": 4.714801444043322, |
| "grad_norm": 0.40086954832077026, |
| "learning_rate": 2.2267136823501455e-05, |
| "loss": 0.06614114344120026, |
| "step": 655 |
| }, |
| { |
| "epoch": 4.722021660649819, |
| "grad_norm": 0.3914971351623535, |
| "learning_rate": 2.1892008226176762e-05, |
| "loss": 0.06879283487796783, |
| "step": 656 |
| }, |
| { |
| "epoch": 4.729241877256317, |
| "grad_norm": 0.3335961699485779, |
| "learning_rate": 2.1519883350410443e-05, |
| "loss": 0.07069812715053558, |
| "step": 657 |
| }, |
| { |
| "epoch": 4.736462093862816, |
| "grad_norm": 0.45657894015312195, |
| "learning_rate": 2.115076847194293e-05, |
| "loss": 0.07669930905103683, |
| "step": 658 |
| }, |
| { |
| "epoch": 4.743682310469314, |
| "grad_norm": 0.28149735927581787, |
| "learning_rate": 2.0784669815752044e-05, |
| "loss": 0.049429114907979965, |
| "step": 659 |
| }, |
| { |
| "epoch": 4.750902527075812, |
| "grad_norm": 0.3293699026107788, |
| "learning_rate": 2.0421593555948393e-05, |
| "loss": 0.05612622946500778, |
| "step": 660 |
| }, |
| { |
| "epoch": 4.758122743682311, |
| "grad_norm": 0.4966195821762085, |
| "learning_rate": 2.0061545815670967e-05, |
| "loss": 0.0798168033361435, |
| "step": 661 |
| }, |
| { |
| "epoch": 4.765342960288809, |
| "grad_norm": 0.40849727392196655, |
| "learning_rate": 1.970453266698391e-05, |
| "loss": 0.06668490171432495, |
| "step": 662 |
| }, |
| { |
| "epoch": 4.772563176895307, |
| "grad_norm": 0.37348175048828125, |
| "learning_rate": 1.935056013077423e-05, |
| "loss": 0.06494098156690598, |
| "step": 663 |
| }, |
| { |
| "epoch": 4.7797833935018055, |
| "grad_norm": 0.29984328150749207, |
| "learning_rate": 1.8999634176650205e-05, |
| "loss": 0.051809899508953094, |
| "step": 664 |
| }, |
| { |
| "epoch": 4.787003610108303, |
| "grad_norm": 0.366937518119812, |
| "learning_rate": 1.8651760722840672e-05, |
| "loss": 0.06859136372804642, |
| "step": 665 |
| }, |
| { |
| "epoch": 4.794223826714801, |
| "grad_norm": 0.46067899465560913, |
| "learning_rate": 1.830694563609525e-05, |
| "loss": 0.06466175615787506, |
| "step": 666 |
| }, |
| { |
| "epoch": 4.8014440433212995, |
| "grad_norm": 0.45101597905158997, |
| "learning_rate": 1.7965194731585422e-05, |
| "loss": 0.062113627791404724, |
| "step": 667 |
| }, |
| { |
| "epoch": 4.808664259927798, |
| "grad_norm": 0.39610734581947327, |
| "learning_rate": 1.762651377280633e-05, |
| "loss": 0.06937667727470398, |
| "step": 668 |
| }, |
| { |
| "epoch": 4.815884476534296, |
| "grad_norm": 0.406488835811615, |
| "learning_rate": 1.7290908471479805e-05, |
| "loss": 0.06851080060005188, |
| "step": 669 |
| }, |
| { |
| "epoch": 4.823104693140794, |
| "grad_norm": 0.4525218904018402, |
| "learning_rate": 1.6958384487457923e-05, |
| "loss": 0.05941899120807648, |
| "step": 670 |
| }, |
| { |
| "epoch": 4.830324909747293, |
| "grad_norm": 0.3088747560977936, |
| "learning_rate": 1.6628947428627438e-05, |
| "loss": 0.05612848699092865, |
| "step": 671 |
| }, |
| { |
| "epoch": 4.837545126353791, |
| "grad_norm": 0.330507755279541, |
| "learning_rate": 1.6302602850815397e-05, |
| "loss": 0.06643390655517578, |
| "step": 672 |
| }, |
| { |
| "epoch": 4.844765342960288, |
| "grad_norm": 0.3919360041618347, |
| "learning_rate": 1.59793562576954e-05, |
| "loss": 0.06949331611394882, |
| "step": 673 |
| }, |
| { |
| "epoch": 4.851985559566787, |
| "grad_norm": 0.43942415714263916, |
| "learning_rate": 1.5659213100694626e-05, |
| "loss": 0.06495010107755661, |
| "step": 674 |
| }, |
| { |
| "epoch": 4.859205776173285, |
| "grad_norm": 0.3314034640789032, |
| "learning_rate": 1.5342178778902116e-05, |
| "loss": 0.0641111359000206, |
| "step": 675 |
| }, |
| { |
| "epoch": 4.866425992779783, |
| "grad_norm": 0.6920318007469177, |
| "learning_rate": 1.5028258638977677e-05, |
| "loss": 0.08164434880018234, |
| "step": 676 |
| }, |
| { |
| "epoch": 4.873646209386282, |
| "grad_norm": 0.4740530550479889, |
| "learning_rate": 1.4717457975061455e-05, |
| "loss": 0.07530295848846436, |
| "step": 677 |
| }, |
| { |
| "epoch": 4.88086642599278, |
| "grad_norm": 0.4190375506877899, |
| "learning_rate": 1.4409782028685104e-05, |
| "loss": 0.07053963840007782, |
| "step": 678 |
| }, |
| { |
| "epoch": 4.888086642599278, |
| "grad_norm": 0.3520694375038147, |
| "learning_rate": 1.4105235988682896e-05, |
| "loss": 0.06393534690141678, |
| "step": 679 |
| }, |
| { |
| "epoch": 4.8953068592057765, |
| "grad_norm": 0.40102067589759827, |
| "learning_rate": 1.3803824991104663e-05, |
| "loss": 0.07699807733297348, |
| "step": 680 |
| }, |
| { |
| "epoch": 4.902527075812275, |
| "grad_norm": 0.4686407148838043, |
| "learning_rate": 1.350555411912886e-05, |
| "loss": 0.06640764325857162, |
| "step": 681 |
| }, |
| { |
| "epoch": 4.909747292418773, |
| "grad_norm": 0.31581881642341614, |
| "learning_rate": 1.3210428402976971e-05, |
| "loss": 0.06866060942411423, |
| "step": 682 |
| }, |
| { |
| "epoch": 4.9169675090252705, |
| "grad_norm": 0.39863917231559753, |
| "learning_rate": 1.2918452819828663e-05, |
| "loss": 0.05927126109600067, |
| "step": 683 |
| }, |
| { |
| "epoch": 4.924187725631769, |
| "grad_norm": 0.7777624130249023, |
| "learning_rate": 1.2629632293737903e-05, |
| "loss": 0.07142484188079834, |
| "step": 684 |
| }, |
| { |
| "epoch": 4.931407942238267, |
| "grad_norm": 0.3229425251483917, |
| "learning_rate": 1.2343971695549727e-05, |
| "loss": 0.06009211018681526, |
| "step": 685 |
| }, |
| { |
| "epoch": 4.938628158844765, |
| "grad_norm": 0.5577942132949829, |
| "learning_rate": 1.2061475842818338e-05, |
| "loss": 0.08745917677879333, |
| "step": 686 |
| }, |
| { |
| "epoch": 4.945848375451264, |
| "grad_norm": 0.4703651964664459, |
| "learning_rate": 1.1782149499725714e-05, |
| "loss": 0.059495482593774796, |
| "step": 687 |
| }, |
| { |
| "epoch": 4.953068592057762, |
| "grad_norm": 0.4242652356624603, |
| "learning_rate": 1.150599737700122e-05, |
| "loss": 0.05950907990336418, |
| "step": 688 |
| }, |
| { |
| "epoch": 4.96028880866426, |
| "grad_norm": 0.454228013753891, |
| "learning_rate": 1.1233024131842374e-05, |
| "loss": 0.08122330904006958, |
| "step": 689 |
| }, |
| { |
| "epoch": 4.967509025270758, |
| "grad_norm": 0.28975188732147217, |
| "learning_rate": 1.0963234367836106e-05, |
| "loss": 0.05605383217334747, |
| "step": 690 |
| }, |
| { |
| "epoch": 4.974729241877256, |
| "grad_norm": 0.38855794072151184, |
| "learning_rate": 1.0696632634881099e-05, |
| "loss": 0.059195615351200104, |
| "step": 691 |
| }, |
| { |
| "epoch": 4.981949458483754, |
| "grad_norm": 0.46655526757240295, |
| "learning_rate": 1.043322342911126e-05, |
| "loss": 0.07140585780143738, |
| "step": 692 |
| }, |
| { |
| "epoch": 4.9891696750902526, |
| "grad_norm": 0.4152084290981293, |
| "learning_rate": 1.017301119281977e-05, |
| "loss": 0.07906543463468552, |
| "step": 693 |
| }, |
| { |
| "epoch": 4.996389891696751, |
| "grad_norm": 0.39406299591064453, |
| "learning_rate": 9.916000314384044e-06, |
| "loss": 0.0740804374217987, |
| "step": 694 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.4732819199562073, |
| "learning_rate": 9.662195128191997e-06, |
| "loss": 0.06949488818645477, |
| "step": 695 |
| }, |
| { |
| "epoch": 5.007220216606498, |
| "grad_norm": 0.2503197491168976, |
| "learning_rate": 9.411599914568702e-06, |
| "loss": 0.048096612095832825, |
| "step": 696 |
| }, |
| { |
| "epoch": 5.014440433212997, |
| "grad_norm": 0.20594151318073273, |
| "learning_rate": 9.164218899704336e-06, |
| "loss": 0.044573940336704254, |
| "step": 697 |
| }, |
| { |
| "epoch": 5.021660649819495, |
| "grad_norm": 0.3241271674633026, |
| "learning_rate": 8.920056255582943e-06, |
| "loss": 0.05117207020521164, |
| "step": 698 |
| }, |
| { |
| "epoch": 5.028880866425993, |
| "grad_norm": 0.18174150586128235, |
| "learning_rate": 8.679116099911854e-06, |
| "loss": 0.04179506003856659, |
| "step": 699 |
| }, |
| { |
| "epoch": 5.036101083032491, |
| "grad_norm": 0.1984250396490097, |
| "learning_rate": 8.441402496052496e-06, |
| "loss": 0.053596131503582, |
| "step": 700 |
| }, |
| { |
| "epoch": 5.043321299638989, |
| "grad_norm": 0.21945425868034363, |
| "learning_rate": 8.206919452951778e-06, |
| "loss": 0.042779602110385895, |
| "step": 701 |
| }, |
| { |
| "epoch": 5.050541516245487, |
| "grad_norm": 0.260366827249527, |
| "learning_rate": 7.975670925074386e-06, |
| "loss": 0.06215063855051994, |
| "step": 702 |
| }, |
| { |
| "epoch": 5.0577617328519855, |
| "grad_norm": 0.14613628387451172, |
| "learning_rate": 7.747660812336222e-06, |
| "loss": 0.04405611380934715, |
| "step": 703 |
| }, |
| { |
| "epoch": 5.064981949458484, |
| "grad_norm": 0.14823344349861145, |
| "learning_rate": 7.52289296003863e-06, |
| "loss": 0.040258534252643585, |
| "step": 704 |
| }, |
| { |
| "epoch": 5.072202166064982, |
| "grad_norm": 0.11375842988491058, |
| "learning_rate": 7.301371158803383e-06, |
| "loss": 0.037275541573762894, |
| "step": 705 |
| }, |
| { |
| "epoch": 5.07942238267148, |
| "grad_norm": 0.18186606466770172, |
| "learning_rate": 7.083099144508976e-06, |
| "loss": 0.04733014106750488, |
| "step": 706 |
| }, |
| { |
| "epoch": 5.086642599277979, |
| "grad_norm": 0.21977707743644714, |
| "learning_rate": 6.8680805982275e-06, |
| "loss": 0.04784165322780609, |
| "step": 707 |
| }, |
| { |
| "epoch": 5.093862815884476, |
| "grad_norm": 0.2574014663696289, |
| "learning_rate": 6.656319146162515e-06, |
| "loss": 0.05287627875804901, |
| "step": 708 |
| }, |
| { |
| "epoch": 5.101083032490974, |
| "grad_norm": 0.24309682846069336, |
| "learning_rate": 6.447818359588054e-06, |
| "loss": 0.05101453512907028, |
| "step": 709 |
| }, |
| { |
| "epoch": 5.108303249097473, |
| "grad_norm": 0.18539798259735107, |
| "learning_rate": 6.242581754788268e-06, |
| "loss": 0.04852500185370445, |
| "step": 710 |
| }, |
| { |
| "epoch": 5.115523465703971, |
| "grad_norm": 0.2783971130847931, |
| "learning_rate": 6.040612792998123e-06, |
| "loss": 0.04606611281633377, |
| "step": 711 |
| }, |
| { |
| "epoch": 5.122743682310469, |
| "grad_norm": 0.2432968020439148, |
| "learning_rate": 5.841914880345111e-06, |
| "loss": 0.05015704780817032, |
| "step": 712 |
| }, |
| { |
| "epoch": 5.129963898916968, |
| "grad_norm": 0.21053971350193024, |
| "learning_rate": 5.64649136779174e-06, |
| "loss": 0.04552324116230011, |
| "step": 713 |
| }, |
| { |
| "epoch": 5.137184115523466, |
| "grad_norm": 0.2822020947933197, |
| "learning_rate": 5.454345551079043e-06, |
| "loss": 0.04811366647481918, |
| "step": 714 |
| }, |
| { |
| "epoch": 5.144404332129964, |
| "grad_norm": 0.3009110689163208, |
| "learning_rate": 5.265480670671053e-06, |
| "loss": 0.057499293237924576, |
| "step": 715 |
| }, |
| { |
| "epoch": 5.1516245487364625, |
| "grad_norm": 0.2482829988002777, |
| "learning_rate": 5.07989991169997e-06, |
| "loss": 0.04851806163787842, |
| "step": 716 |
| }, |
| { |
| "epoch": 5.15884476534296, |
| "grad_norm": 0.4322669506072998, |
| "learning_rate": 4.8976064039126805e-06, |
| "loss": 0.040396105498075485, |
| "step": 717 |
| }, |
| { |
| "epoch": 5.166064981949458, |
| "grad_norm": 0.22342568635940552, |
| "learning_rate": 4.718603221617834e-06, |
| "loss": 0.047426290810108185, |
| "step": 718 |
| }, |
| { |
| "epoch": 5.1732851985559565, |
| "grad_norm": 0.3421519994735718, |
| "learning_rate": 4.542893383634028e-06, |
| "loss": 0.050219837576150894, |
| "step": 719 |
| }, |
| { |
| "epoch": 5.180505415162455, |
| "grad_norm": 0.2810814678668976, |
| "learning_rate": 4.370479853238863e-06, |
| "loss": 0.039352793246507645, |
| "step": 720 |
| }, |
| { |
| "epoch": 5.187725631768953, |
| "grad_norm": 0.16700661182403564, |
| "learning_rate": 4.201365538119096e-06, |
| "loss": 0.03363039717078209, |
| "step": 721 |
| }, |
| { |
| "epoch": 5.194945848375451, |
| "grad_norm": 0.21759076416492462, |
| "learning_rate": 4.035553290321414e-06, |
| "loss": 0.04147165268659592, |
| "step": 722 |
| }, |
| { |
| "epoch": 5.20216606498195, |
| "grad_norm": 0.13985103368759155, |
| "learning_rate": 3.873045906204453e-06, |
| "loss": 0.043792638927698135, |
| "step": 723 |
| }, |
| { |
| "epoch": 5.209386281588448, |
| "grad_norm": 0.2625335156917572, |
| "learning_rate": 3.7138461263916513e-06, |
| "loss": 0.05631346255540848, |
| "step": 724 |
| }, |
| { |
| "epoch": 5.216606498194946, |
| "grad_norm": 0.16157718002796173, |
| "learning_rate": 3.5579566357249793e-06, |
| "loss": 0.03788129985332489, |
| "step": 725 |
| }, |
| { |
| "epoch": 5.223826714801444, |
| "grad_norm": 0.19516326487064362, |
| "learning_rate": 3.405380063219643e-06, |
| "loss": 0.044141985476017, |
| "step": 726 |
| }, |
| { |
| "epoch": 5.231046931407942, |
| "grad_norm": 0.15679222345352173, |
| "learning_rate": 3.256118982019851e-06, |
| "loss": 0.04181442782282829, |
| "step": 727 |
| }, |
| { |
| "epoch": 5.23826714801444, |
| "grad_norm": 0.17116223275661469, |
| "learning_rate": 3.1101759093552728e-06, |
| "loss": 0.036379870027303696, |
| "step": 728 |
| }, |
| { |
| "epoch": 5.245487364620939, |
| "grad_norm": 0.13156861066818237, |
| "learning_rate": 2.9675533064986936e-06, |
| "loss": 0.035947442054748535, |
| "step": 729 |
| }, |
| { |
| "epoch": 5.252707581227437, |
| "grad_norm": 0.31538212299346924, |
| "learning_rate": 2.828253578724538e-06, |
| "loss": 0.04313802346587181, |
| "step": 730 |
| }, |
| { |
| "epoch": 5.259927797833935, |
| "grad_norm": 0.16643384099006653, |
| "learning_rate": 2.6922790752681004e-06, |
| "loss": 0.03961348533630371, |
| "step": 731 |
| }, |
| { |
| "epoch": 5.2671480144404335, |
| "grad_norm": 0.2200719565153122, |
| "learning_rate": 2.5596320892862013e-06, |
| "loss": 0.051561444997787476, |
| "step": 732 |
| }, |
| { |
| "epoch": 5.274368231046932, |
| "grad_norm": 0.3769620954990387, |
| "learning_rate": 2.4303148578183497e-06, |
| "loss": 0.07501974701881409, |
| "step": 733 |
| }, |
| { |
| "epoch": 5.28158844765343, |
| "grad_norm": 0.20830948650836945, |
| "learning_rate": 2.3043295617489746e-06, |
| "loss": 0.04220421612262726, |
| "step": 734 |
| }, |
| { |
| "epoch": 5.2888086642599275, |
| "grad_norm": 0.19785495102405548, |
| "learning_rate": 2.1816783257708083e-06, |
| "loss": 0.03773656487464905, |
| "step": 735 |
| }, |
| { |
| "epoch": 5.296028880866426, |
| "grad_norm": 0.2532854974269867, |
| "learning_rate": 2.0623632183489396e-06, |
| "loss": 0.04975757375359535, |
| "step": 736 |
| }, |
| { |
| "epoch": 5.303249097472924, |
| "grad_norm": 0.21113696694374084, |
| "learning_rate": 1.9463862516859276e-06, |
| "loss": 0.04151391610503197, |
| "step": 737 |
| }, |
| { |
| "epoch": 5.310469314079422, |
| "grad_norm": 0.41521739959716797, |
| "learning_rate": 1.8337493816879436e-06, |
| "loss": 0.04442136362195015, |
| "step": 738 |
| }, |
| { |
| "epoch": 5.317689530685921, |
| "grad_norm": 0.21844181418418884, |
| "learning_rate": 1.724454507931683e-06, |
| "loss": 0.04176602512598038, |
| "step": 739 |
| }, |
| { |
| "epoch": 5.324909747292419, |
| "grad_norm": 0.17051415145397186, |
| "learning_rate": 1.6185034736324156e-06, |
| "loss": 0.03697787970304489, |
| "step": 740 |
| }, |
| { |
| "epoch": 5.332129963898917, |
| "grad_norm": 0.18966346979141235, |
| "learning_rate": 1.515898065612853e-06, |
| "loss": 0.038028594106435776, |
| "step": 741 |
| }, |
| { |
| "epoch": 5.3393501805054155, |
| "grad_norm": 0.17065943777561188, |
| "learning_rate": 1.4166400142730407e-06, |
| "loss": 0.035803914070129395, |
| "step": 742 |
| }, |
| { |
| "epoch": 5.346570397111913, |
| "grad_norm": 0.31374311447143555, |
| "learning_rate": 1.3207309935611367e-06, |
| "loss": 0.056527022272348404, |
| "step": 743 |
| }, |
| { |
| "epoch": 5.353790613718411, |
| "grad_norm": 0.1854705810546875, |
| "learning_rate": 1.2281726209452781e-06, |
| "loss": 0.043832339346408844, |
| "step": 744 |
| }, |
| { |
| "epoch": 5.3610108303249095, |
| "grad_norm": 0.14780643582344055, |
| "learning_rate": 1.138966457386137e-06, |
| "loss": 0.03484298288822174, |
| "step": 745 |
| }, |
| { |
| "epoch": 5.368231046931408, |
| "grad_norm": 0.20580680668354034, |
| "learning_rate": 1.0531140073107626e-06, |
| "loss": 0.042070530354976654, |
| "step": 746 |
| }, |
| { |
| "epoch": 5.375451263537906, |
| "grad_norm": 0.18943031132221222, |
| "learning_rate": 9.70616718587114e-07, |
| "loss": 0.04158391058444977, |
| "step": 747 |
| }, |
| { |
| "epoch": 5.382671480144404, |
| "grad_norm": 0.1829371601343155, |
| "learning_rate": 8.914759824996788e-07, |
| "loss": 0.03915674239397049, |
| "step": 748 |
| }, |
| { |
| "epoch": 5.389891696750903, |
| "grad_norm": 0.23177319765090942, |
| "learning_rate": 8.15693133725981e-07, |
| "loss": 0.03685947507619858, |
| "step": 749 |
| }, |
| { |
| "epoch": 5.397111913357401, |
| "grad_norm": 0.2238905131816864, |
| "learning_rate": 7.432694503141102e-07, |
| "loss": 0.04124440625309944, |
| "step": 750 |
| }, |
| { |
| "epoch": 5.404332129963899, |
| "grad_norm": 0.19328288733959198, |
| "learning_rate": 6.742061536611166e-07, |
| "loss": 0.04230578616261482, |
| "step": 751 |
| }, |
| { |
| "epoch": 5.411552346570397, |
| "grad_norm": 0.2329723834991455, |
| "learning_rate": 6.085044084924718e-07, |
| "loss": 0.04548173397779465, |
| "step": 752 |
| }, |
| { |
| "epoch": 5.418772563176895, |
| "grad_norm": 0.17935019731521606, |
| "learning_rate": 5.461653228423957e-07, |
| "loss": 0.04122517257928848, |
| "step": 753 |
| }, |
| { |
| "epoch": 5.425992779783393, |
| "grad_norm": 0.19964119791984558, |
| "learning_rate": 4.871899480351605e-07, |
| "loss": 0.03701728954911232, |
| "step": 754 |
| }, |
| { |
| "epoch": 5.433212996389892, |
| "grad_norm": 0.17978627979755402, |
| "learning_rate": 4.315792786673489e-07, |
| "loss": 0.04114942252635956, |
| "step": 755 |
| }, |
| { |
| "epoch": 5.44043321299639, |
| "grad_norm": 0.3241960406303406, |
| "learning_rate": 3.7933425259117914e-07, |
| "loss": 0.0526733361184597, |
| "step": 756 |
| }, |
| { |
| "epoch": 5.447653429602888, |
| "grad_norm": 0.22060273587703705, |
| "learning_rate": 3.3045575089853954e-07, |
| "loss": 0.04859931021928787, |
| "step": 757 |
| }, |
| { |
| "epoch": 5.4548736462093865, |
| "grad_norm": 0.22234350442886353, |
| "learning_rate": 2.849445979062226e-07, |
| "loss": 0.0446445494890213, |
| "step": 758 |
| }, |
| { |
| "epoch": 5.462093862815885, |
| "grad_norm": 0.3016712963581085, |
| "learning_rate": 2.428015611420253e-07, |
| "loss": 0.0628553107380867, |
| "step": 759 |
| }, |
| { |
| "epoch": 5.469314079422382, |
| "grad_norm": 0.21905440092086792, |
| "learning_rate": 2.040273513317148e-07, |
| "loss": 0.04386765509843826, |
| "step": 760 |
| }, |
| { |
| "epoch": 5.4765342960288805, |
| "grad_norm": 0.24936382472515106, |
| "learning_rate": 1.686226223872378e-07, |
| "loss": 0.0364193394780159, |
| "step": 761 |
| }, |
| { |
| "epoch": 5.483754512635379, |
| "grad_norm": 0.21109315752983093, |
| "learning_rate": 1.3658797139541878e-07, |
| "loss": 0.036623530089855194, |
| "step": 762 |
| }, |
| { |
| "epoch": 5.490974729241877, |
| "grad_norm": 0.3846798241138458, |
| "learning_rate": 1.0792393860814543e-07, |
| "loss": 0.039832159876823425, |
| "step": 763 |
| }, |
| { |
| "epoch": 5.498194945848375, |
| "grad_norm": 0.3015185296535492, |
| "learning_rate": 8.263100743310937e-08, |
| "loss": 0.054986074566841125, |
| "step": 764 |
| }, |
| { |
| "epoch": 5.505415162454874, |
| "grad_norm": 0.22440600395202637, |
| "learning_rate": 6.070960442567941e-08, |
| "loss": 0.04008050635457039, |
| "step": 765 |
| }, |
| { |
| "epoch": 5.512635379061372, |
| "grad_norm": 0.2297099381685257, |
| "learning_rate": 4.216009928172948e-08, |
| "loss": 0.05160742253065109, |
| "step": 766 |
| }, |
| { |
| "epoch": 5.51985559566787, |
| "grad_norm": 0.2735305726528168, |
| "learning_rate": 2.698280483142135e-08, |
| "loss": 0.047364577651023865, |
| "step": 767 |
| }, |
| { |
| "epoch": 5.527075812274369, |
| "grad_norm": 0.18135972321033478, |
| "learning_rate": 1.517797703387558e-08, |
| "loss": 0.03480882942676544, |
| "step": 768 |
| }, |
| { |
| "epoch": 5.534296028880867, |
| "grad_norm": 0.28884103894233704, |
| "learning_rate": 6.745814972908271e-09, |
| "loss": 0.049272023141384125, |
| "step": 769 |
| }, |
| { |
| "epoch": 5.541516245487364, |
| "grad_norm": 0.22770103812217712, |
| "learning_rate": 1.6864608536115711e-09, |
| "loss": 0.04704172536730766, |
| "step": 770 |
| }, |
| { |
| "epoch": 5.541516245487364, |
| "eval_loss": 0.7697137594223022, |
| "eval_runtime": 48.9364, |
| "eval_samples_per_second": 2.003, |
| "eval_steps_per_second": 0.511, |
| "step": 770 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 770, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.51023270933007e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|