{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.07313997477931904, "eval_steps": 500, "global_step": 39650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0018014772113132769, "grad_norm": 0.3361005187034607, "learning_rate": 1.9424460431654675e-06, "loss": 7.14, "step": 10 }, { "epoch": 0.0036029544226265538, "grad_norm": 0.33613964915275574, "learning_rate": 4.100719424460432e-06, "loss": 7.1355, "step": 20 }, { "epoch": 0.005404431633939831, "grad_norm": 0.3325730562210083, "learning_rate": 6.2589928057553964e-06, "loss": 7.1349, "step": 30 }, { "epoch": 0.0072059088452531075, "grad_norm": 0.3301234543323517, "learning_rate": 8.417266187050359e-06, "loss": 7.1359, "step": 40 }, { "epoch": 0.009007386056566384, "grad_norm": 0.3354959487915039, "learning_rate": 1.0575539568345324e-05, "loss": 7.1374, "step": 50 }, { "epoch": 0.010808863267879661, "grad_norm": 0.33391907811164856, "learning_rate": 1.2733812949640289e-05, "loss": 7.1404, "step": 60 }, { "epoch": 0.012610340479192938, "grad_norm": 0.3405277132987976, "learning_rate": 1.4892086330935253e-05, "loss": 7.1393, "step": 70 }, { "epoch": 0.014411817690506215, "grad_norm": 0.3303971588611603, "learning_rate": 1.7050359712230218e-05, "loss": 7.1313, "step": 80 }, { "epoch": 0.016213294901819494, "grad_norm": 0.32211360335350037, "learning_rate": 1.920863309352518e-05, "loss": 7.1296, "step": 90 }, { "epoch": 0.01801477211313277, "grad_norm": 0.3308568298816681, "learning_rate": 2.1366906474820145e-05, "loss": 7.1334, "step": 100 }, { "epoch": 0.019816249324446047, "grad_norm": 0.32452037930488586, "learning_rate": 2.3525179856115108e-05, "loss": 7.1336, "step": 110 }, { "epoch": 0.021617726535759323, "grad_norm": 0.31127870082855225, "learning_rate": 2.5683453237410074e-05, "loss": 7.1286, "step": 120 }, { "epoch": 0.0234192037470726, "grad_norm": 0.3265085518360138, "learning_rate": 2.7841726618705038e-05, "loss": 7.1318, "step": 130 }, { "epoch": 0.025220680958385876, "grad_norm": 0.3214122951030731, "learning_rate": 3e-05, "loss": 7.1301, "step": 140 }, { "epoch": 0.027022158169699155, "grad_norm": 0.33086255192756653, "learning_rate": 3.2158273381294964e-05, "loss": 7.1298, "step": 150 }, { "epoch": 0.02882363538101243, "grad_norm": 0.3205653429031372, "learning_rate": 3.431654676258993e-05, "loss": 7.1304, "step": 160 }, { "epoch": 0.03062511259232571, "grad_norm": 0.31835320591926575, "learning_rate": 3.647482014388489e-05, "loss": 7.1297, "step": 170 }, { "epoch": 0.03242658980363899, "grad_norm": 0.31880906224250793, "learning_rate": 3.863309352517986e-05, "loss": 7.1291, "step": 180 }, { "epoch": 0.03422806701495226, "grad_norm": 0.323652982711792, "learning_rate": 4.0791366906474823e-05, "loss": 7.1269, "step": 190 }, { "epoch": 0.03602954422626554, "grad_norm": 0.3075670003890991, "learning_rate": 4.294964028776978e-05, "loss": 7.1226, "step": 200 }, { "epoch": 0.03783102143757881, "grad_norm": 0.32530224323272705, "learning_rate": 4.510791366906475e-05, "loss": 7.1293, "step": 210 }, { "epoch": 0.039632498648892095, "grad_norm": 0.3238227665424347, "learning_rate": 4.726618705035971e-05, "loss": 7.1339, "step": 220 }, { "epoch": 0.04143397586020537, "grad_norm": 0.31533968448638916, "learning_rate": 4.9424460431654676e-05, "loss": 7.1265, "step": 230 }, { "epoch": 0.043235453071518645, "grad_norm": 0.3249633014202118, "learning_rate": 5.158273381294964e-05, "loss": 7.1317, "step": 240 }, { "epoch": 0.04503693028283192, "grad_norm": 0.3172495365142822, "learning_rate": 5.37410071942446e-05, "loss": 7.1301, "step": 250 }, { "epoch": 0.0468384074941452, "grad_norm": 0.3189067542552948, "learning_rate": 5.589928057553957e-05, "loss": 7.1272, "step": 260 }, { "epoch": 0.04863988470545848, "grad_norm": 0.31838688254356384, "learning_rate": 5.8057553956834536e-05, "loss": 7.1262, "step": 270 }, { "epoch": 0.05044136191677175, "grad_norm": 0.31754276156425476, "learning_rate": 5.9999994675541476e-05, "loss": 7.1319, "step": 280 }, { "epoch": 0.05224283912808503, "grad_norm": 0.32886457443237305, "learning_rate": 5.9999355742804625e-05, "loss": 7.1293, "step": 290 }, { "epoch": 0.05404431633939831, "grad_norm": 0.3191179931163788, "learning_rate": 5.999765194434878e-05, "loss": 7.1282, "step": 300 }, { "epoch": 0.055845793550711585, "grad_norm": 0.3112945258617401, "learning_rate": 5.999488334065245e-05, "loss": 7.1262, "step": 310 }, { "epoch": 0.05764727076202486, "grad_norm": 0.31179070472717285, "learning_rate": 5.9991050029990786e-05, "loss": 7.1235, "step": 320 }, { "epoch": 0.059448747973338135, "grad_norm": 0.30959728360176086, "learning_rate": 5.998615214843208e-05, "loss": 7.1222, "step": 330 }, { "epoch": 0.06125022518465142, "grad_norm": 0.30832692980766296, "learning_rate": 5.998018986983294e-05, "loss": 7.1201, "step": 340 }, { "epoch": 0.06305170239596469, "grad_norm": 0.3159351050853729, "learning_rate": 5.9973163405832115e-05, "loss": 7.1275, "step": 350 }, { "epoch": 0.06485317960727797, "grad_norm": 0.3227883577346802, "learning_rate": 5.996507300584298e-05, "loss": 7.1268, "step": 360 }, { "epoch": 0.06665465681859124, "grad_norm": 0.316466361284256, "learning_rate": 5.9955918957044694e-05, "loss": 7.1286, "step": 370 }, { "epoch": 0.06845613402990453, "grad_norm": 0.31269118189811707, "learning_rate": 5.994570158437199e-05, "loss": 7.1217, "step": 380 }, { "epoch": 0.0702576112412178, "grad_norm": 0.32262393832206726, "learning_rate": 5.993442125050364e-05, "loss": 7.1244, "step": 390 }, { "epoch": 0.07205908845253108, "grad_norm": 0.31211158633232117, "learning_rate": 5.992207835584963e-05, "loss": 7.1204, "step": 400 }, { "epoch": 0.07386056566384436, "grad_norm": 0.3049217164516449, "learning_rate": 5.990867333853686e-05, "loss": 7.1181, "step": 410 }, { "epoch": 0.07566204287515763, "grad_norm": 0.31584808230400085, "learning_rate": 5.989420667439366e-05, "loss": 7.1234, "step": 420 }, { "epoch": 0.07746352008647091, "grad_norm": 0.31106922030448914, "learning_rate": 5.987867887693291e-05, "loss": 7.1214, "step": 430 }, { "epoch": 0.07926499729778419, "grad_norm": 0.32401391863822937, "learning_rate": 5.986209049733373e-05, "loss": 7.1236, "step": 440 }, { "epoch": 0.08106647450909746, "grad_norm": 0.31644102931022644, "learning_rate": 5.9844442124422e-05, "loss": 7.1305, "step": 450 }, { "epoch": 0.08286795172041074, "grad_norm": 0.30985546112060547, "learning_rate": 5.98257343846494e-05, "loss": 7.124, "step": 460 }, { "epoch": 0.08466942893172401, "grad_norm": 0.3285570442676544, "learning_rate": 5.980596794207124e-05, "loss": 7.1296, "step": 470 }, { "epoch": 0.08647090614303729, "grad_norm": 0.3286840617656708, "learning_rate": 5.978514349832281e-05, "loss": 7.1304, "step": 480 }, { "epoch": 0.08827238335435057, "grad_norm": 0.3256056606769562, "learning_rate": 5.976326179259453e-05, "loss": 7.131, "step": 490 }, { "epoch": 0.09007386056566384, "grad_norm": 0.3061903715133667, "learning_rate": 5.974032360160569e-05, "loss": 7.1274, "step": 500 }, { "epoch": 0.09187533777697712, "grad_norm": 0.30826541781425476, "learning_rate": 5.971632973957688e-05, "loss": 7.1229, "step": 510 }, { "epoch": 0.0936768149882904, "grad_norm": 0.3031136095523834, "learning_rate": 5.96912810582011e-05, "loss": 7.1235, "step": 520 }, { "epoch": 0.09547829219960367, "grad_norm": 0.31557247042655945, "learning_rate": 5.9665178446613516e-05, "loss": 7.126, "step": 530 }, { "epoch": 0.09727976941091696, "grad_norm": 0.30393531918525696, "learning_rate": 5.96380228313599e-05, "loss": 7.1207, "step": 540 }, { "epoch": 0.09908124662223022, "grad_norm": 0.3191448748111725, "learning_rate": 5.960981517636374e-05, "loss": 7.1271, "step": 550 }, { "epoch": 0.1008827238335435, "grad_norm": 0.30414143204689026, "learning_rate": 5.9580556482892036e-05, "loss": 7.1265, "step": 560 }, { "epoch": 0.10268420104485679, "grad_norm": 0.3122613728046417, "learning_rate": 5.955024778951974e-05, "loss": 7.1262, "step": 570 }, { "epoch": 0.10448567825617006, "grad_norm": 0.30681541562080383, "learning_rate": 5.951889017209292e-05, "loss": 7.1288, "step": 580 }, { "epoch": 0.10628715546748334, "grad_norm": 0.3067385256290436, "learning_rate": 5.948648474369054e-05, "loss": 7.1267, "step": 590 }, { "epoch": 0.10808863267879662, "grad_norm": 0.3071127235889435, "learning_rate": 5.945303265458494e-05, "loss": 7.1224, "step": 600 }, { "epoch": 0.10989010989010989, "grad_norm": 0.31324437260627747, "learning_rate": 5.9418535092201084e-05, "loss": 7.1151, "step": 610 }, { "epoch": 0.11169158710142317, "grad_norm": 0.30634191632270813, "learning_rate": 5.938299328107431e-05, "loss": 7.1128, "step": 620 }, { "epoch": 0.11349306431273644, "grad_norm": 0.30638769268989563, "learning_rate": 5.934640848280691e-05, "loss": 7.1101, "step": 630 }, { "epoch": 0.11529454152404972, "grad_norm": 0.28490421175956726, "learning_rate": 5.930878199602335e-05, "loss": 7.1129, "step": 640 }, { "epoch": 0.117096018735363, "grad_norm": 0.297146201133728, "learning_rate": 5.92701151563242e-05, "loss": 7.1143, "step": 650 }, { "epoch": 0.11889749594667627, "grad_norm": 0.29625293612480164, "learning_rate": 5.923040933623866e-05, "loss": 7.1125, "step": 660 }, { "epoch": 0.12069897315798955, "grad_norm": 0.29476043581962585, "learning_rate": 5.9189665945175874e-05, "loss": 7.1105, "step": 670 }, { "epoch": 0.12250045036930284, "grad_norm": 0.30027854442596436, "learning_rate": 5.9147886429374924e-05, "loss": 7.1133, "step": 680 }, { "epoch": 0.1243019275806161, "grad_norm": 0.30565157532691956, "learning_rate": 5.910507227185348e-05, "loss": 7.1197, "step": 690 }, { "epoch": 0.12610340479192939, "grad_norm": 0.30098357796669006, "learning_rate": 5.90612249923551e-05, "loss": 7.114, "step": 700 }, { "epoch": 0.12790488200324265, "grad_norm": 0.3063106834888458, "learning_rate": 5.901634614729539e-05, "loss": 7.1188, "step": 710 }, { "epoch": 0.12970635921455595, "grad_norm": 0.2957518398761749, "learning_rate": 5.897043732970667e-05, "loss": 7.1126, "step": 720 }, { "epoch": 0.13150783642586922, "grad_norm": 0.30434298515319824, "learning_rate": 5.892350016918149e-05, "loss": 7.1092, "step": 730 }, { "epoch": 0.13330931363718249, "grad_norm": 0.29086801409721375, "learning_rate": 5.887553633181471e-05, "loss": 7.1056, "step": 740 }, { "epoch": 0.13511079084849575, "grad_norm": 0.30126407742500305, "learning_rate": 5.8826547520144446e-05, "loss": 7.1067, "step": 750 }, { "epoch": 0.13691226805980905, "grad_norm": 0.3037166893482208, "learning_rate": 5.877653547309159e-05, "loss": 7.1131, "step": 760 }, { "epoch": 0.13871374527112232, "grad_norm": 0.31079939007759094, "learning_rate": 5.8725501965898065e-05, "loss": 7.1169, "step": 770 }, { "epoch": 0.1405152224824356, "grad_norm": 0.288516640663147, "learning_rate": 5.8673448810063846e-05, "loss": 7.1087, "step": 780 }, { "epoch": 0.14231669969374888, "grad_norm": 0.29432857036590576, "learning_rate": 5.862037785328267e-05, "loss": 7.1139, "step": 790 }, { "epoch": 0.14411817690506215, "grad_norm": 0.314834862947464, "learning_rate": 5.8566290979376405e-05, "loss": 7.1183, "step": 800 }, { "epoch": 0.14591965411637542, "grad_norm": 0.3082789480686188, "learning_rate": 5.851119010822821e-05, "loss": 7.1192, "step": 810 }, { "epoch": 0.14772113132768872, "grad_norm": 0.28503844141960144, "learning_rate": 5.8455077195714375e-05, "loss": 7.112, "step": 820 }, { "epoch": 0.14952260853900198, "grad_norm": 0.3095056116580963, "learning_rate": 5.839795423363491e-05, "loss": 7.1151, "step": 830 }, { "epoch": 0.15132408575031525, "grad_norm": 0.3043094873428345, "learning_rate": 5.833982324964282e-05, "loss": 7.1128, "step": 840 }, { "epoch": 0.15312556296162855, "grad_norm": 0.2926342487335205, "learning_rate": 5.828068630717216e-05, "loss": 7.1108, "step": 850 }, { "epoch": 0.15492704017294182, "grad_norm": 0.3017706871032715, "learning_rate": 5.8220545505364785e-05, "loss": 7.111, "step": 860 }, { "epoch": 0.15672851738425508, "grad_norm": 0.2948770821094513, "learning_rate": 5.815940297899579e-05, "loss": 7.1113, "step": 870 }, { "epoch": 0.15852999459556838, "grad_norm": 0.2928009331226349, "learning_rate": 5.80972608983978e-05, "loss": 7.1146, "step": 880 }, { "epoch": 0.16033147180688165, "grad_norm": 0.298864483833313, "learning_rate": 5.803412146938389e-05, "loss": 7.1151, "step": 890 }, { "epoch": 0.16213294901819492, "grad_norm": 0.2933467626571655, "learning_rate": 5.7969986933169324e-05, "loss": 7.1113, "step": 900 }, { "epoch": 0.16393442622950818, "grad_norm": 0.2906360924243927, "learning_rate": 5.790485956629194e-05, "loss": 7.1131, "step": 910 }, { "epoch": 0.16573590344082148, "grad_norm": 0.29419684410095215, "learning_rate": 5.78387416805314e-05, "loss": 7.1127, "step": 920 }, { "epoch": 0.16753738065213475, "grad_norm": 0.302638977766037, "learning_rate": 5.777163562282712e-05, "loss": 7.1136, "step": 930 }, { "epoch": 0.16933885786344802, "grad_norm": 0.3013106882572174, "learning_rate": 5.770354377519491e-05, "loss": 7.1162, "step": 940 }, { "epoch": 0.1711403350747613, "grad_norm": 0.29050013422966003, "learning_rate": 5.76344685546425e-05, "loss": 7.1107, "step": 950 }, { "epoch": 0.17294181228607458, "grad_norm": 0.2983992397785187, "learning_rate": 5.7564412413083684e-05, "loss": 7.1098, "step": 960 }, { "epoch": 0.17474328949738785, "grad_norm": 0.30069518089294434, "learning_rate": 5.749337783725133e-05, "loss": 7.1119, "step": 970 }, { "epoch": 0.17654476670870115, "grad_norm": 0.28475502133369446, "learning_rate": 5.742136734860905e-05, "loss": 7.11, "step": 980 }, { "epoch": 0.1783462439200144, "grad_norm": 0.2936151325702667, "learning_rate": 5.73483835032618e-05, "loss": 7.1116, "step": 990 }, { "epoch": 0.18014772113132768, "grad_norm": 0.29760053753852844, "learning_rate": 5.727442889186502e-05, "loss": 7.113, "step": 1000 }, { "epoch": 0.18194919834264098, "grad_norm": 0.29304802417755127, "learning_rate": 5.7199506139532764e-05, "loss": 7.1138, "step": 1010 }, { "epoch": 0.18375067555395425, "grad_norm": 0.2970970869064331, "learning_rate": 5.712361790574451e-05, "loss": 7.1175, "step": 1020 }, { "epoch": 0.18555215276526751, "grad_norm": 0.30437323451042175, "learning_rate": 5.7046766884250726e-05, "loss": 7.113, "step": 1030 }, { "epoch": 0.1873536299765808, "grad_norm": 0.29815158247947693, "learning_rate": 5.696895580297726e-05, "loss": 7.1123, "step": 1040 }, { "epoch": 0.18915510718789408, "grad_norm": 0.29573309421539307, "learning_rate": 5.6890187423928535e-05, "loss": 7.1136, "step": 1050 }, { "epoch": 0.19095658439920735, "grad_norm": 0.28850027918815613, "learning_rate": 5.681046454308948e-05, "loss": 7.1112, "step": 1060 }, { "epoch": 0.19275806161052061, "grad_norm": 0.29388895630836487, "learning_rate": 5.672978999032631e-05, "loss": 7.1147, "step": 1070 }, { "epoch": 0.1945595388218339, "grad_norm": 0.2940690815448761, "learning_rate": 5.664816662928601e-05, "loss": 7.1143, "step": 1080 }, { "epoch": 0.19636101603314718, "grad_norm": 0.29269036650657654, "learning_rate": 5.65655973572948e-05, "loss": 7.11, "step": 1090 }, { "epoch": 0.19816249324446045, "grad_norm": 0.30264806747436523, "learning_rate": 5.648208510525517e-05, "loss": 7.1146, "step": 1100 }, { "epoch": 0.19996397045577374, "grad_norm": 0.29382726550102234, "learning_rate": 5.639763283754195e-05, "loss": 7.1092, "step": 1110 }, { "epoch": 0.201765447667087, "grad_norm": 0.2934025526046753, "learning_rate": 5.6312243551896985e-05, "loss": 7.1072, "step": 1120 }, { "epoch": 0.20356692487840028, "grad_norm": 0.28128302097320557, "learning_rate": 5.6225920279322824e-05, "loss": 7.1029, "step": 1130 }, { "epoch": 0.20536840208971358, "grad_norm": 0.2939789593219757, "learning_rate": 5.613866608397504e-05, "loss": 7.1091, "step": 1140 }, { "epoch": 0.20716987930102684, "grad_norm": 0.26693952083587646, "learning_rate": 5.6050484063053544e-05, "loss": 7.1054, "step": 1150 }, { "epoch": 0.2089713565123401, "grad_norm": 0.2876898944377899, "learning_rate": 5.5961377346692584e-05, "loss": 7.0949, "step": 1160 }, { "epoch": 0.2107728337236534, "grad_norm": 0.2696641981601715, "learning_rate": 5.5871349097849675e-05, "loss": 7.0991, "step": 1170 }, { "epoch": 0.21257431093496668, "grad_norm": 0.27382394671440125, "learning_rate": 5.578040251219332e-05, "loss": 7.0976, "step": 1180 }, { "epoch": 0.21437578814627994, "grad_norm": 0.29006198048591614, "learning_rate": 5.568854081798954e-05, "loss": 7.0967, "step": 1190 }, { "epoch": 0.21617726535759324, "grad_norm": 0.2822149097919464, "learning_rate": 5.559576727598733e-05, "loss": 7.1005, "step": 1200 }, { "epoch": 0.2179787425689065, "grad_norm": 0.2781624495983124, "learning_rate": 5.5502085179302906e-05, "loss": 7.1046, "step": 1210 }, { "epoch": 0.21978021978021978, "grad_norm": 0.2801850140094757, "learning_rate": 5.540749785330279e-05, "loss": 7.1039, "step": 1220 }, { "epoch": 0.22158169699153304, "grad_norm": 0.292462021112442, "learning_rate": 5.531200865548577e-05, "loss": 7.1052, "step": 1230 }, { "epoch": 0.22338317420284634, "grad_norm": 0.2921360433101654, "learning_rate": 5.5215620975363746e-05, "loss": 7.104, "step": 1240 }, { "epoch": 0.2251846514141596, "grad_norm": 0.2885719835758209, "learning_rate": 5.511833823434142e-05, "loss": 7.1056, "step": 1250 }, { "epoch": 0.22698612862547288, "grad_norm": 0.2794191837310791, "learning_rate": 5.502016388559479e-05, "loss": 7.1046, "step": 1260 }, { "epoch": 0.22878760583678617, "grad_norm": 0.28259149193763733, "learning_rate": 5.492110141394866e-05, "loss": 7.1062, "step": 1270 }, { "epoch": 0.23058908304809944, "grad_norm": 0.28836706280708313, "learning_rate": 5.482115433575288e-05, "loss": 7.1069, "step": 1280 }, { "epoch": 0.2323905602594127, "grad_norm": 0.29538246989250183, "learning_rate": 5.472032619875755e-05, "loss": 7.1069, "step": 1290 }, { "epoch": 0.234192037470726, "grad_norm": 0.2921678125858307, "learning_rate": 5.4618620581987075e-05, "loss": 7.1077, "step": 1300 }, { "epoch": 0.23599351468203927, "grad_norm": 0.2910170257091522, "learning_rate": 5.4516041095613143e-05, "loss": 7.1115, "step": 1310 }, { "epoch": 0.23779499189335254, "grad_norm": 0.2894938886165619, "learning_rate": 5.4412591380826565e-05, "loss": 7.1074, "step": 1320 }, { "epoch": 0.23959646910466584, "grad_norm": 0.28597739338874817, "learning_rate": 5.430827510970804e-05, "loss": 7.1076, "step": 1330 }, { "epoch": 0.2413979463159791, "grad_norm": 0.28273728489875793, "learning_rate": 5.4203095985097796e-05, "loss": 7.1082, "step": 1340 }, { "epoch": 0.24319942352729237, "grad_norm": 0.2996627986431122, "learning_rate": 5.4097057740464124e-05, "loss": 7.1083, "step": 1350 }, { "epoch": 0.24500090073860567, "grad_norm": 0.27732813358306885, "learning_rate": 5.3990164139770924e-05, "loss": 7.1072, "step": 1360 }, { "epoch": 0.24680237794991894, "grad_norm": 0.2852475643157959, "learning_rate": 5.388241897734405e-05, "loss": 7.1057, "step": 1370 }, { "epoch": 0.2486038551612322, "grad_norm": 0.2819502651691437, "learning_rate": 5.377382607773662e-05, "loss": 7.1047, "step": 1380 }, { "epoch": 0.2504053323725455, "grad_norm": 0.287288099527359, "learning_rate": 5.3664389295593286e-05, "loss": 7.1038, "step": 1390 }, { "epoch": 0.25220680958385877, "grad_norm": 0.28206947445869446, "learning_rate": 5.3554112515513395e-05, "loss": 7.1075, "step": 1400 }, { "epoch": 0.25400828679517207, "grad_norm": 0.2885054051876068, "learning_rate": 5.344299965191311e-05, "loss": 7.1044, "step": 1410 }, { "epoch": 0.2558097640064853, "grad_norm": 0.29194867610931396, "learning_rate": 5.3331054648886424e-05, "loss": 7.1042, "step": 1420 }, { "epoch": 0.2576112412177986, "grad_norm": 0.28325173258781433, "learning_rate": 5.321828148006523e-05, "loss": 7.1072, "step": 1430 }, { "epoch": 0.2594127184291119, "grad_norm": 0.2817630469799042, "learning_rate": 5.31046841484782e-05, "loss": 7.1119, "step": 1440 }, { "epoch": 0.26121419564042514, "grad_norm": 0.29226502776145935, "learning_rate": 5.299026668640873e-05, "loss": 7.1081, "step": 1450 }, { "epoch": 0.26301567285173844, "grad_norm": 0.29092028737068176, "learning_rate": 5.287503315525182e-05, "loss": 7.1074, "step": 1460 }, { "epoch": 0.2648171500630517, "grad_norm": 0.2762048542499542, "learning_rate": 5.275898764536987e-05, "loss": 7.1051, "step": 1470 }, { "epoch": 0.26661862727436497, "grad_norm": 0.2918614447116852, "learning_rate": 5.264213427594752e-05, "loss": 7.1054, "step": 1480 }, { "epoch": 0.26842010448567827, "grad_norm": 0.2910752594470978, "learning_rate": 5.252447719484543e-05, "loss": 7.1049, "step": 1490 }, { "epoch": 0.2702215816969915, "grad_norm": 0.2944730222225189, "learning_rate": 5.2406020578453e-05, "loss": 7.1031, "step": 1500 }, { "epoch": 0.2720230589083048, "grad_norm": 0.29117706418037415, "learning_rate": 5.2286768631540225e-05, "loss": 7.1098, "step": 1510 }, { "epoch": 0.2738245361196181, "grad_norm": 0.28816863894462585, "learning_rate": 5.216672558710835e-05, "loss": 7.1088, "step": 1520 }, { "epoch": 0.27562601333093134, "grad_norm": 0.2859260141849518, "learning_rate": 5.204589570623962e-05, "loss": 7.1113, "step": 1530 }, { "epoch": 0.27742749054224464, "grad_norm": 0.28866350650787354, "learning_rate": 5.1924283277946096e-05, "loss": 7.113, "step": 1540 }, { "epoch": 0.27922896775355793, "grad_norm": 0.2842634618282318, "learning_rate": 5.180189261901733e-05, "loss": 7.1081, "step": 1550 }, { "epoch": 0.2810304449648712, "grad_norm": 0.28465187549591064, "learning_rate": 5.167872807386721e-05, "loss": 7.1105, "step": 1560 }, { "epoch": 0.28283192217618447, "grad_norm": 0.2890684902667999, "learning_rate": 5.155479401437966e-05, "loss": 7.1094, "step": 1570 }, { "epoch": 0.28463339938749777, "grad_norm": 0.2944437265396118, "learning_rate": 5.1430094839753544e-05, "loss": 7.1114, "step": 1580 }, { "epoch": 0.286434876598811, "grad_norm": 0.2873692810535431, "learning_rate": 5.130463497634646e-05, "loss": 7.112, "step": 1590 }, { "epoch": 0.2882363538101243, "grad_norm": 0.28741228580474854, "learning_rate": 5.117841887751764e-05, "loss": 7.1101, "step": 1600 }, { "epoch": 0.2900378310214376, "grad_norm": 0.29523614048957825, "learning_rate": 5.1051451023469857e-05, "loss": 7.1113, "step": 1610 }, { "epoch": 0.29183930823275084, "grad_norm": 0.2787465751171112, "learning_rate": 5.09237359210904e-05, "loss": 7.1085, "step": 1620 }, { "epoch": 0.29364078544406413, "grad_norm": 0.2841334342956543, "learning_rate": 5.079527810379109e-05, "loss": 7.1056, "step": 1630 }, { "epoch": 0.29544226265537743, "grad_norm": 0.2873667776584625, "learning_rate": 5.0666082131347404e-05, "loss": 7.1085, "step": 1640 }, { "epoch": 0.29724373986669067, "grad_norm": 0.2831195592880249, "learning_rate": 5.053615258973655e-05, "loss": 7.1104, "step": 1650 }, { "epoch": 0.29904521707800397, "grad_norm": 0.2801186740398407, "learning_rate": 5.0405494090974734e-05, "loss": 7.1053, "step": 1660 }, { "epoch": 0.30084669428931726, "grad_norm": 0.27779996395111084, "learning_rate": 5.027411127295343e-05, "loss": 7.1042, "step": 1670 }, { "epoch": 0.3026481715006305, "grad_norm": 0.28383657336235046, "learning_rate": 5.0142008799274754e-05, "loss": 7.1028, "step": 1680 }, { "epoch": 0.3044496487119438, "grad_norm": 0.27557089924812317, "learning_rate": 5.000919135908594e-05, "loss": 7.1048, "step": 1690 }, { "epoch": 0.3062511259232571, "grad_norm": 0.2826816737651825, "learning_rate": 4.987566366691285e-05, "loss": 7.1029, "step": 1700 }, { "epoch": 0.30805260313457034, "grad_norm": 0.28983259201049805, "learning_rate": 4.974143046249267e-05, "loss": 7.1043, "step": 1710 }, { "epoch": 0.30985408034588363, "grad_norm": 0.280671089887619, "learning_rate": 4.9606496510605646e-05, "loss": 7.1076, "step": 1720 }, { "epoch": 0.3116555575571969, "grad_norm": 0.2826908826828003, "learning_rate": 4.947086660090597e-05, "loss": 7.1053, "step": 1730 }, { "epoch": 0.31345703476851017, "grad_norm": 0.2816767692565918, "learning_rate": 4.9334545547751734e-05, "loss": 7.1095, "step": 1740 }, { "epoch": 0.31525851197982346, "grad_norm": 0.2781662046909332, "learning_rate": 4.9197538190034086e-05, "loss": 7.1096, "step": 1750 }, { "epoch": 0.31705998919113676, "grad_norm": 0.2725811004638672, "learning_rate": 4.905984939100538e-05, "loss": 7.1061, "step": 1760 }, { "epoch": 0.31886146640245, "grad_norm": 0.28421854972839355, "learning_rate": 4.8921484038106656e-05, "loss": 7.1094, "step": 1770 }, { "epoch": 0.3206629436137633, "grad_norm": 0.28778699040412903, "learning_rate": 4.87824470427941e-05, "loss": 7.1138, "step": 1780 }, { "epoch": 0.32246442082507654, "grad_norm": 0.2935123145580292, "learning_rate": 4.864274334036469e-05, "loss": 7.1086, "step": 1790 }, { "epoch": 0.32426589803638983, "grad_norm": 0.28005754947662354, "learning_rate": 4.850237788978102e-05, "loss": 7.1071, "step": 1800 }, { "epoch": 0.32606737524770313, "grad_norm": 0.2749859094619751, "learning_rate": 4.8361355673495335e-05, "loss": 7.1069, "step": 1810 }, { "epoch": 0.32786885245901637, "grad_norm": 0.2896328866481781, "learning_rate": 4.821968169727258e-05, "loss": 7.1078, "step": 1820 }, { "epoch": 0.32967032967032966, "grad_norm": 0.2806931436061859, "learning_rate": 4.8077360990012765e-05, "loss": 7.1077, "step": 1830 }, { "epoch": 0.33147180688164296, "grad_norm": 0.2747594118118286, "learning_rate": 4.7934398603572464e-05, "loss": 7.104, "step": 1840 }, { "epoch": 0.3332732840929562, "grad_norm": 0.265837162733078, "learning_rate": 4.779079961258546e-05, "loss": 7.1024, "step": 1850 }, { "epoch": 0.3350747613042695, "grad_norm": 0.2891830503940582, "learning_rate": 4.764656911428266e-05, "loss": 7.1068, "step": 1860 }, { "epoch": 0.3368762385155828, "grad_norm": 0.2813827097415924, "learning_rate": 4.750171222831111e-05, "loss": 7.1029, "step": 1870 }, { "epoch": 0.33867771572689603, "grad_norm": 0.2912938892841339, "learning_rate": 4.735623409655229e-05, "loss": 7.1064, "step": 1880 }, { "epoch": 0.34047919293820933, "grad_norm": 0.27962204813957214, "learning_rate": 4.721013988293962e-05, "loss": 7.104, "step": 1890 }, { "epoch": 0.3422806701495226, "grad_norm": 0.2841653525829315, "learning_rate": 4.7063434773275135e-05, "loss": 7.106, "step": 1900 }, { "epoch": 0.0018014772113132769, "grad_norm": 0.3077791929244995, "learning_rate": 4.691612397504539e-05, "loss": 7.1339, "step": 1910 }, { "epoch": 0.0036029544226265538, "grad_norm": 0.2980128526687622, "learning_rate": 4.6768212717236644e-05, "loss": 7.121, "step": 1920 }, { "epoch": 0.005404431633939831, "grad_norm": 0.28817498683929443, "learning_rate": 4.661970625014926e-05, "loss": 7.1134, "step": 1930 }, { "epoch": 0.0072059088452531075, "grad_norm": 0.29074928164482117, "learning_rate": 4.6470609845211304e-05, "loss": 7.1133, "step": 1940 }, { "epoch": 0.009007386056566384, "grad_norm": 0.2961477041244507, "learning_rate": 4.632092879479143e-05, "loss": 7.1158, "step": 1950 }, { "epoch": 0.010808863267879661, "grad_norm": 0.29236099123954773, "learning_rate": 4.6170668412011056e-05, "loss": 7.1085, "step": 1960 }, { "epoch": 0.012610340479192938, "grad_norm": 0.2707636058330536, "learning_rate": 4.601983403055576e-05, "loss": 7.1021, "step": 1970 }, { "epoch": 0.014411817690506215, "grad_norm": 0.27785351872444153, "learning_rate": 4.58684310044859e-05, "loss": 7.1012, "step": 1980 }, { "epoch": 0.016213294901819494, "grad_norm": 0.2802380621433258, "learning_rate": 4.571646470804665e-05, "loss": 7.1019, "step": 1990 }, { "epoch": 0.01801477211313277, "grad_norm": 0.27940770983695984, "learning_rate": 4.5563940535477155e-05, "loss": 7.1015, "step": 2000 }, { "epoch": 0.019816249324446047, "grad_norm": 0.2777670919895172, "learning_rate": 4.541086390081911e-05, "loss": 7.1022, "step": 2010 }, { "epoch": 0.021617726535759323, "grad_norm": 0.2813062369823456, "learning_rate": 4.525724023772456e-05, "loss": 7.0977, "step": 2020 }, { "epoch": 0.0234192037470726, "grad_norm": 0.28203415870666504, "learning_rate": 4.510307499926303e-05, "loss": 7.0989, "step": 2030 }, { "epoch": 0.025220680958385876, "grad_norm": 0.2658560276031494, "learning_rate": 4.494837365772794e-05, "loss": 7.099, "step": 2040 }, { "epoch": 0.027022158169699155, "grad_norm": 0.29370710253715515, "learning_rate": 4.479314170444242e-05, "loss": 7.1012, "step": 2050 }, { "epoch": 0.02882363538101243, "grad_norm": 0.271969199180603, "learning_rate": 4.4637384649564295e-05, "loss": 7.1059, "step": 2060 }, { "epoch": 0.03062511259232571, "grad_norm": 0.2734180688858032, "learning_rate": 4.4481108021890576e-05, "loss": 7.1026, "step": 2070 }, { "epoch": 0.03242658980363899, "grad_norm": 0.29048392176628113, "learning_rate": 4.4324317368661195e-05, "loss": 7.1012, "step": 2080 }, { "epoch": 0.03422806701495226, "grad_norm": 0.2677623927593231, "learning_rate": 4.4167018255362045e-05, "loss": 7.1012, "step": 2090 }, { "epoch": 0.03602954422626554, "grad_norm": 0.2717573344707489, "learning_rate": 4.4009216265527464e-05, "loss": 7.0959, "step": 2100 }, { "epoch": 0.03783102143757881, "grad_norm": 0.27196934819221497, "learning_rate": 4.3850917000542085e-05, "loss": 7.0968, "step": 2110 }, { "epoch": 0.039632498648892095, "grad_norm": 0.26916953921318054, "learning_rate": 4.369212607944192e-05, "loss": 7.1006, "step": 2120 }, { "epoch": 0.04143397586020537, "grad_norm": 0.26671674847602844, "learning_rate": 4.353284913871495e-05, "loss": 7.1011, "step": 2130 }, { "epoch": 0.043235453071518645, "grad_norm": 0.2532482445240021, "learning_rate": 4.337309183210108e-05, "loss": 7.0903, "step": 2140 }, { "epoch": 0.04503693028283192, "grad_norm": 0.2542300224304199, "learning_rate": 4.321285983039138e-05, "loss": 7.094, "step": 2150 }, { "epoch": 0.0468384074941452, "grad_norm": 0.2545788288116455, "learning_rate": 4.305215882122685e-05, "loss": 7.0871, "step": 2160 }, { "epoch": 0.04863988470545848, "grad_norm": 0.260768324136734, "learning_rate": 4.2890994508896524e-05, "loss": 7.0893, "step": 2170 }, { "epoch": 0.05044136191677175, "grad_norm": 0.2566660940647125, "learning_rate": 4.272937261413495e-05, "loss": 7.0913, "step": 2180 }, { "epoch": 0.05224283912808503, "grad_norm": 0.26606038212776184, "learning_rate": 4.2567298873919185e-05, "loss": 7.0936, "step": 2190 }, { "epoch": 0.05404431633939831, "grad_norm": 0.2727970778942108, "learning_rate": 4.240477904126511e-05, "loss": 7.1015, "step": 2200 }, { "epoch": 0.055845793550711585, "grad_norm": 0.2565927803516388, "learning_rate": 4.224181888502322e-05, "loss": 7.0951, "step": 2210 }, { "epoch": 0.05764727076202486, "grad_norm": 0.26139333844184875, "learning_rate": 4.207842418967386e-05, "loss": 7.0986, "step": 2220 }, { "epoch": 0.059448747973338135, "grad_norm": 0.25254175066947937, "learning_rate": 4.191460075512192e-05, "loss": 7.0954, "step": 2230 }, { "epoch": 0.06125022518465142, "grad_norm": 0.2547784149646759, "learning_rate": 4.1750354396490925e-05, "loss": 7.0947, "step": 2240 }, { "epoch": 0.06305170239596469, "grad_norm": 0.2603592574596405, "learning_rate": 4.158569094391665e-05, "loss": 7.0916, "step": 2250 }, { "epoch": 0.06485317960727797, "grad_norm": 0.2585497200489044, "learning_rate": 4.142061624234012e-05, "loss": 7.0919, "step": 2260 }, { "epoch": 0.06665465681859124, "grad_norm": 0.2732134759426117, "learning_rate": 4.125513615130022e-05, "loss": 7.096, "step": 2270 }, { "epoch": 0.06845613402990453, "grad_norm": 0.267095685005188, "learning_rate": 4.108925654472562e-05, "loss": 7.1027, "step": 2280 }, { "epoch": 0.0702576112412178, "grad_norm": 0.2722208797931671, "learning_rate": 4.092298331072632e-05, "loss": 7.1041, "step": 2290 }, { "epoch": 0.07205908845253108, "grad_norm": 0.2778127193450928, "learning_rate": 4.0756322351384633e-05, "loss": 7.0991, "step": 2300 }, { "epoch": 0.07386056566384436, "grad_norm": 0.278670996427536, "learning_rate": 4.058927958254567e-05, "loss": 7.1068, "step": 2310 }, { "epoch": 0.07566204287515763, "grad_norm": 0.29091861844062805, "learning_rate": 4.042186093360739e-05, "loss": 7.1082, "step": 2320 }, { "epoch": 0.07746352008647091, "grad_norm": 0.271429181098938, "learning_rate": 4.0254072347310066e-05, "loss": 7.1129, "step": 2330 }, { "epoch": 0.07926499729778419, "grad_norm": 0.2803885042667389, "learning_rate": 4.008591977952539e-05, "loss": 7.1029, "step": 2340 }, { "epoch": 0.08106647450909746, "grad_norm": 0.2776201069355011, "learning_rate": 3.9917409199045055e-05, "loss": 7.1031, "step": 2350 }, { "epoch": 0.08286795172041074, "grad_norm": 0.2702827751636505, "learning_rate": 3.9748546587368844e-05, "loss": 7.0966, "step": 2360 }, { "epoch": 0.08466942893172401, "grad_norm": 0.28276774287223816, "learning_rate": 3.957933793849237e-05, "loss": 7.1037, "step": 2370 }, { "epoch": 0.08647090614303729, "grad_norm": 0.28798773884773254, "learning_rate": 3.940978925869428e-05, "loss": 7.1063, "step": 2380 }, { "epoch": 0.08827238335435057, "grad_norm": 0.29184576869010925, "learning_rate": 3.923990656632303e-05, "loss": 7.1065, "step": 2390 }, { "epoch": 0.09007386056566384, "grad_norm": 0.28556135296821594, "learning_rate": 3.90696958915833e-05, "loss": 7.1116, "step": 2400 }, { "epoch": 0.09187533777697712, "grad_norm": 0.290779709815979, "learning_rate": 3.8899163276321935e-05, "loss": 7.1106, "step": 2410 }, { "epoch": 0.0936768149882904, "grad_norm": 0.286338210105896, "learning_rate": 3.872831477381345e-05, "loss": 7.1112, "step": 2420 }, { "epoch": 0.09547829219960367, "grad_norm": 0.2762143313884735, "learning_rate": 3.855715644854521e-05, "loss": 7.115, "step": 2430 }, { "epoch": 0.09727976941091696, "grad_norm": 0.2853328287601471, "learning_rate": 3.838569437600212e-05, "loss": 7.1133, "step": 2440 }, { "epoch": 0.09908124662223022, "grad_norm": 0.2878759801387787, "learning_rate": 3.821393464245099e-05, "loss": 7.1107, "step": 2450 }, { "epoch": 0.1008827238335435, "grad_norm": 0.2866576015949249, "learning_rate": 3.804188334472448e-05, "loss": 7.1127, "step": 2460 }, { "epoch": 0.10268420104485679, "grad_norm": 0.2897520065307617, "learning_rate": 3.7869546590004716e-05, "loss": 7.1127, "step": 2470 }, { "epoch": 0.10448567825617006, "grad_norm": 0.287649005651474, "learning_rate": 3.7696930495606476e-05, "loss": 7.1126, "step": 2480 }, { "epoch": 0.10628715546748334, "grad_norm": 0.2783564329147339, "learning_rate": 3.752404118876005e-05, "loss": 7.1129, "step": 2490 }, { "epoch": 0.10808863267879662, "grad_norm": 0.27361151576042175, "learning_rate": 3.7350884806393775e-05, "loss": 7.1079, "step": 2500 }, { "epoch": 0.10989010989010989, "grad_norm": 0.25903868675231934, "learning_rate": 3.717746749491617e-05, "loss": 7.1021, "step": 2510 }, { "epoch": 0.11169158710142317, "grad_norm": 0.25960326194763184, "learning_rate": 3.7003795409997746e-05, "loss": 7.099, "step": 2520 }, { "epoch": 0.11349306431273644, "grad_norm": 0.24807259440422058, "learning_rate": 3.682987471635256e-05, "loss": 7.092, "step": 2530 }, { "epoch": 0.11529454152404972, "grad_norm": 0.2572445571422577, "learning_rate": 3.6655711587519344e-05, "loss": 7.0931, "step": 2540 }, { "epoch": 0.117096018735363, "grad_norm": 0.282367080450058, "learning_rate": 3.648131220564236e-05, "loss": 7.1032, "step": 2550 }, { "epoch": 0.11889749594667627, "grad_norm": 0.27821698784828186, "learning_rate": 3.6306682761252e-05, "loss": 7.1028, "step": 2560 }, { "epoch": 0.12069897315798955, "grad_norm": 0.27028584480285645, "learning_rate": 3.613182945304501e-05, "loss": 7.1013, "step": 2570 }, { "epoch": 0.12250045036930284, "grad_norm": 0.2562548816204071, "learning_rate": 3.595675848766446e-05, "loss": 7.0939, "step": 2580 }, { "epoch": 0.1243019275806161, "grad_norm": 0.25729700922966003, "learning_rate": 3.578147607947948e-05, "loss": 7.0929, "step": 2590 }, { "epoch": 0.12610340479192939, "grad_norm": 0.23584581911563873, "learning_rate": 3.560598845036458e-05, "loss": 7.0781, "step": 2600 }, { "epoch": 0.12790488200324265, "grad_norm": 0.2478930801153183, "learning_rate": 3.543030182947888e-05, "loss": 7.0796, "step": 2610 }, { "epoch": 0.12970635921455595, "grad_norm": 0.23921333253383636, "learning_rate": 3.525442245304498e-05, "loss": 7.0775, "step": 2620 }, { "epoch": 0.13150783642586922, "grad_norm": 0.24996818602085114, "learning_rate": 3.5078356564127574e-05, "loss": 7.0842, "step": 2630 }, { "epoch": 0.13330931363718249, "grad_norm": 0.2454964816570282, "learning_rate": 3.490211041241183e-05, "loss": 7.0807, "step": 2640 }, { "epoch": 0.13511079084849575, "grad_norm": 0.2454310804605484, "learning_rate": 3.4725690253981634e-05, "loss": 7.0833, "step": 2650 }, { "epoch": 0.13691226805980905, "grad_norm": 0.24396756291389465, "learning_rate": 3.454910235109741e-05, "loss": 7.0829, "step": 2660 }, { "epoch": 0.13871374527112232, "grad_norm": 0.24686896800994873, "learning_rate": 3.437235297197392e-05, "loss": 7.0848, "step": 2670 }, { "epoch": 0.1405152224824356, "grad_norm": 0.2439989149570465, "learning_rate": 3.419544839055771e-05, "loss": 7.0827, "step": 2680 }, { "epoch": 0.14231669969374888, "grad_norm": 0.2449413537979126, "learning_rate": 3.4018394886304454e-05, "loss": 7.0816, "step": 2690 }, { "epoch": 0.14411817690506215, "grad_norm": 0.23497846722602844, "learning_rate": 3.384119874395601e-05, "loss": 7.0842, "step": 2700 }, { "epoch": 0.14591965411637542, "grad_norm": 0.25669828057289124, "learning_rate": 3.366386625331738e-05, "loss": 7.0886, "step": 2710 }, { "epoch": 0.14772113132768872, "grad_norm": 0.25092875957489014, "learning_rate": 3.348640370903341e-05, "loss": 7.0886, "step": 2720 }, { "epoch": 0.14952260853900198, "grad_norm": 0.2566509246826172, "learning_rate": 3.3308817410365374e-05, "loss": 7.0884, "step": 2730 }, { "epoch": 0.15132408575031525, "grad_norm": 0.2686420977115631, "learning_rate": 3.313111366096737e-05, "loss": 7.0933, "step": 2740 }, { "epoch": 0.15312556296162855, "grad_norm": 0.2604024112224579, "learning_rate": 3.295329876866255e-05, "loss": 7.092, "step": 2750 }, { "epoch": 0.15492704017294182, "grad_norm": 0.26000502705574036, "learning_rate": 3.277537904521922e-05, "loss": 7.0877, "step": 2760 }, { "epoch": 0.15672851738425508, "grad_norm": 0.26063740253448486, "learning_rate": 3.259736080612684e-05, "loss": 7.0902, "step": 2770 }, { "epoch": 0.15852999459556838, "grad_norm": 0.25636982917785645, "learning_rate": 3.241925037037176e-05, "loss": 7.0893, "step": 2780 }, { "epoch": 0.16033147180688165, "grad_norm": 0.24808195233345032, "learning_rate": 3.224105406021301e-05, "loss": 7.0856, "step": 2790 }, { "epoch": 0.16213294901819492, "grad_norm": 0.2557581663131714, "learning_rate": 3.206277820095782e-05, "loss": 7.0915, "step": 2800 }, { "epoch": 0.16393442622950818, "grad_norm": 0.26228415966033936, "learning_rate": 3.188442912073712e-05, "loss": 7.0939, "step": 2810 }, { "epoch": 0.16573590344082148, "grad_norm": 0.2565348446369171, "learning_rate": 3.170601315028092e-05, "loss": 7.0889, "step": 2820 }, { "epoch": 0.16753738065213475, "grad_norm": 0.27181676030158997, "learning_rate": 3.152753662269357e-05, "loss": 7.0905, "step": 2830 }, { "epoch": 0.16933885786344802, "grad_norm": 0.26413819193840027, "learning_rate": 3.1349005873229e-05, "loss": 7.0934, "step": 2840 }, { "epoch": 0.1711403350747613, "grad_norm": 0.2645306885242462, "learning_rate": 3.117042723906578e-05, "loss": 7.0911, "step": 2850 }, { "epoch": 0.17294181228607458, "grad_norm": 0.2686319053173065, "learning_rate": 3.099180705908226e-05, "loss": 7.0945, "step": 2860 }, { "epoch": 0.17474328949738785, "grad_norm": 0.247956782579422, "learning_rate": 3.0813151673631436e-05, "loss": 7.085, "step": 2870 }, { "epoch": 0.17654476670870115, "grad_norm": 0.24497249722480774, "learning_rate": 3.0634467424316035e-05, "loss": 7.085, "step": 2880 }, { "epoch": 0.1783462439200144, "grad_norm": 0.25811126828193665, "learning_rate": 3.0455760653763334e-05, "loss": 7.0839, "step": 2890 }, { "epoch": 0.18014772113132768, "grad_norm": 0.2580315172672272, "learning_rate": 3.027703770539999e-05, "loss": 7.0862, "step": 2900 }, { "epoch": 0.18194919834264098, "grad_norm": 0.2621557414531708, "learning_rate": 3.0098304923226953e-05, "loss": 7.0907, "step": 2910 }, { "epoch": 0.18375067555395425, "grad_norm": 0.24736247956752777, "learning_rate": 2.9919568651594223e-05, "loss": 7.0872, "step": 2920 }, { "epoch": 0.18555215276526751, "grad_norm": 0.2591515779495239, "learning_rate": 2.9740835234975668e-05, "loss": 7.0879, "step": 2930 }, { "epoch": 0.1873536299765808, "grad_norm": 0.2600081264972687, "learning_rate": 2.9562111017743788e-05, "loss": 7.0925, "step": 2940 }, { "epoch": 0.18915510718789408, "grad_norm": 0.25504061579704285, "learning_rate": 2.938340234394457e-05, "loss": 7.092, "step": 2950 }, { "epoch": 0.19095658439920735, "grad_norm": 0.24798712134361267, "learning_rate": 2.920471555707226e-05, "loss": 7.0861, "step": 2960 }, { "epoch": 0.19275806161052061, "grad_norm": 0.24428805708885193, "learning_rate": 2.9026056999844196e-05, "loss": 7.0877, "step": 2970 }, { "epoch": 0.1945595388218339, "grad_norm": 0.2592213451862335, "learning_rate": 2.8847433013975662e-05, "loss": 7.0886, "step": 2980 }, { "epoch": 0.19636101603314718, "grad_norm": 0.24871060252189636, "learning_rate": 2.8668849939954802e-05, "loss": 7.0874, "step": 2990 }, { "epoch": 0.19816249324446045, "grad_norm": 0.25240686535835266, "learning_rate": 2.8490314116817518e-05, "loss": 7.0866, "step": 3000 }, { "epoch": 0.19996397045577374, "grad_norm": 0.23895671963691711, "learning_rate": 2.8311831881922506e-05, "loss": 7.0846, "step": 3010 }, { "epoch": 0.201765447667087, "grad_norm": 0.2547496557235718, "learning_rate": 2.813340957072626e-05, "loss": 7.0828, "step": 3020 }, { "epoch": 0.20356692487840028, "grad_norm": 0.2456449419260025, "learning_rate": 2.7955053516558222e-05, "loss": 7.0807, "step": 3030 }, { "epoch": 0.20536840208971358, "grad_norm": 0.24654389917850494, "learning_rate": 2.777677005039594e-05, "loss": 7.0829, "step": 3040 }, { "epoch": 0.20716987930102684, "grad_norm": 0.24519839882850647, "learning_rate": 2.7598565500640373e-05, "loss": 7.0846, "step": 3050 }, { "epoch": 0.2089713565123401, "grad_norm": 0.24315975606441498, "learning_rate": 2.7420446192891217e-05, "loss": 7.0829, "step": 3060 }, { "epoch": 0.2107728337236534, "grad_norm": 0.24274303019046783, "learning_rate": 2.724241844972242e-05, "loss": 7.0839, "step": 3070 }, { "epoch": 0.21257431093496668, "grad_norm": 0.24508360028266907, "learning_rate": 2.7064488590457708e-05, "loss": 7.0847, "step": 3080 }, { "epoch": 0.21437578814627994, "grad_norm": 0.23906520009040833, "learning_rate": 2.6886662930946306e-05, "loss": 7.0809, "step": 3090 }, { "epoch": 0.21617726535759324, "grad_norm": 0.2307758331298828, "learning_rate": 2.670894778333872e-05, "loss": 7.0773, "step": 3100 }, { "epoch": 0.2179787425689065, "grad_norm": 0.2300621122121811, "learning_rate": 2.6531349455862716e-05, "loss": 7.0693, "step": 3110 }, { "epoch": 0.21978021978021978, "grad_norm": 0.2338068187236786, "learning_rate": 2.6353874252599347e-05, "loss": 7.0694, "step": 3120 }, { "epoch": 0.22158169699153304, "grad_norm": 0.26211997866630554, "learning_rate": 2.617652847325924e-05, "loss": 7.0803, "step": 3130 }, { "epoch": 0.22338317420284634, "grad_norm": 0.2541845142841339, "learning_rate": 2.599931841295895e-05, "loss": 7.0909, "step": 3140 }, { "epoch": 0.2251846514141596, "grad_norm": 0.2547355592250824, "learning_rate": 2.58222503619975e-05, "loss": 7.0931, "step": 3150 }, { "epoch": 0.22698612862547288, "grad_norm": 0.2507126033306122, "learning_rate": 2.5645330605633103e-05, "loss": 7.0865, "step": 3160 }, { "epoch": 0.22878760583678617, "grad_norm": 0.2500768005847931, "learning_rate": 2.5468565423860108e-05, "loss": 7.0847, "step": 3170 }, { "epoch": 0.23058908304809944, "grad_norm": 0.25642552971839905, "learning_rate": 2.529196109118597e-05, "loss": 7.0922, "step": 3180 }, { "epoch": 0.2323905602594127, "grad_norm": 0.2595052421092987, "learning_rate": 2.5115523876408663e-05, "loss": 7.0891, "step": 3190 }, { "epoch": 0.234192037470726, "grad_norm": 0.2502889931201935, "learning_rate": 2.493926004239405e-05, "loss": 7.0904, "step": 3200 }, { "epoch": 0.23599351468203927, "grad_norm": 0.24289515614509583, "learning_rate": 2.4763175845853656e-05, "loss": 7.0825, "step": 3210 }, { "epoch": 0.23779499189335254, "grad_norm": 0.24600744247436523, "learning_rate": 2.4587277537122516e-05, "loss": 7.0833, "step": 3220 }, { "epoch": 0.23959646910466584, "grad_norm": 0.2696186304092407, "learning_rate": 2.441157135993736e-05, "loss": 7.0867, "step": 3230 }, { "epoch": 0.2413979463159791, "grad_norm": 0.260602205991745, "learning_rate": 2.4236063551214932e-05, "loss": 7.0886, "step": 3240 }, { "epoch": 0.24319942352729237, "grad_norm": 0.2656106650829315, "learning_rate": 2.4060760340830675e-05, "loss": 7.0934, "step": 3250 }, { "epoch": 0.24500090073860567, "grad_norm": 0.2694871723651886, "learning_rate": 2.3885667951397503e-05, "loss": 7.0939, "step": 3260 }, { "epoch": 0.24680237794991894, "grad_norm": 0.2643691301345825, "learning_rate": 2.3710792598045012e-05, "loss": 7.0969, "step": 3270 }, { "epoch": 0.2486038551612322, "grad_norm": 0.24956071376800537, "learning_rate": 2.3536140488198792e-05, "loss": 7.0907, "step": 3280 }, { "epoch": 0.2504053323725455, "grad_norm": 0.26689502596855164, "learning_rate": 2.3361717821360144e-05, "loss": 7.0924, "step": 3290 }, { "epoch": 0.25220680958385877, "grad_norm": 0.2596072852611542, "learning_rate": 2.3187530788885958e-05, "loss": 7.0906, "step": 3300 }, { "epoch": 0.25400828679517207, "grad_norm": 0.2621026039123535, "learning_rate": 2.3013585573769017e-05, "loss": 7.0913, "step": 3310 }, { "epoch": 0.2558097640064853, "grad_norm": 0.2543656527996063, "learning_rate": 2.2839888350418467e-05, "loss": 7.0927, "step": 3320 }, { "epoch": 0.2576112412177986, "grad_norm": 0.2571466863155365, "learning_rate": 2.266644528444067e-05, "loss": 7.0918, "step": 3330 }, { "epoch": 0.2594127184291119, "grad_norm": 0.25938165187835693, "learning_rate": 2.2493262532420336e-05, "loss": 7.0957, "step": 3340 }, { "epoch": 0.26121419564042514, "grad_norm": 0.2550225555896759, "learning_rate": 2.2320346241702034e-05, "loss": 7.0915, "step": 3350 }, { "epoch": 0.26301567285173844, "grad_norm": 0.24893473088741302, "learning_rate": 2.2147702550171884e-05, "loss": 7.0837, "step": 3360 }, { "epoch": 0.2648171500630517, "grad_norm": 0.25547170639038086, "learning_rate": 2.1975337586039796e-05, "loss": 7.085, "step": 3370 }, { "epoch": 0.26661862727436497, "grad_norm": 0.23566247522830963, "learning_rate": 2.180325746762188e-05, "loss": 7.0795, "step": 3380 }, { "epoch": 0.26842010448567827, "grad_norm": 0.2557154595851898, "learning_rate": 2.1631468303123283e-05, "loss": 7.0846, "step": 3390 }, { "epoch": 0.2702215816969915, "grad_norm": 0.24930061399936676, "learning_rate": 2.1459976190421337e-05, "loss": 7.085, "step": 3400 }, { "epoch": 0.2720230589083048, "grad_norm": 0.24489817023277283, "learning_rate": 2.128878721684921e-05, "loss": 7.0831, "step": 3410 }, { "epoch": 0.2738245361196181, "grad_norm": 0.24336157739162445, "learning_rate": 2.1117907458979665e-05, "loss": 7.0903, "step": 3420 }, { "epoch": 0.27562601333093134, "grad_norm": 0.25700876116752625, "learning_rate": 2.0947342982409537e-05, "loss": 7.0882, "step": 3430 }, { "epoch": 0.27742749054224464, "grad_norm": 0.25534772872924805, "learning_rate": 2.0777099841544298e-05, "loss": 7.088, "step": 3440 }, { "epoch": 0.27922896775355793, "grad_norm": 0.24592944979667664, "learning_rate": 2.060718407938322e-05, "loss": 7.0885, "step": 3450 }, { "epoch": 0.2810304449648712, "grad_norm": 0.23909662663936615, "learning_rate": 2.0437601727304825e-05, "loss": 7.0815, "step": 3460 }, { "epoch": 0.28283192217618447, "grad_norm": 0.23884309828281403, "learning_rate": 2.0268358804852823e-05, "loss": 7.0799, "step": 3470 }, { "epoch": 0.28463339938749777, "grad_norm": 0.24644611775875092, "learning_rate": 2.009946131952245e-05, "loss": 7.0808, "step": 3480 }, { "epoch": 0.286434876598811, "grad_norm": 0.23646117746829987, "learning_rate": 1.993091526654718e-05, "loss": 7.0752, "step": 3490 }, { "epoch": 0.2882363538101243, "grad_norm": 0.23513910174369812, "learning_rate": 1.9762726628685977e-05, "loss": 7.0771, "step": 3500 }, { "epoch": 0.2900378310214376, "grad_norm": 0.24085739254951477, "learning_rate": 1.959490137601085e-05, "loss": 7.0808, "step": 3510 }, { "epoch": 0.29183930823275084, "grad_norm": 0.23842355608940125, "learning_rate": 1.942744546569503e-05, "loss": 7.0835, "step": 3520 }, { "epoch": 0.29364078544406413, "grad_norm": 0.2464919239282608, "learning_rate": 1.926036484180143e-05, "loss": 7.0827, "step": 3530 }, { "epoch": 0.29544226265537743, "grad_norm": 0.24027657508850098, "learning_rate": 1.9093665435071715e-05, "loss": 7.0798, "step": 3540 }, { "epoch": 0.29724373986669067, "grad_norm": 0.23790228366851807, "learning_rate": 1.892735316271574e-05, "loss": 7.0809, "step": 3550 }, { "epoch": 0.29904521707800397, "grad_norm": 0.2514890730381012, "learning_rate": 1.8761433928201543e-05, "loss": 7.0854, "step": 3560 }, { "epoch": 0.30084669428931726, "grad_norm": 0.25048115849494934, "learning_rate": 1.8595913621045748e-05, "loss": 7.0884, "step": 3570 }, { "epoch": 0.3026481715006305, "grad_norm": 0.24307414889335632, "learning_rate": 1.8430798116604576e-05, "loss": 7.0886, "step": 3580 }, { "epoch": 0.3044496487119438, "grad_norm": 0.24048565328121185, "learning_rate": 1.826609327586523e-05, "loss": 7.0896, "step": 3590 }, { "epoch": 0.3062511259232571, "grad_norm": 0.24997974932193756, "learning_rate": 1.810180494523789e-05, "loss": 7.09, "step": 3600 }, { "epoch": 0.30805260313457034, "grad_norm": 0.251874715089798, "learning_rate": 1.7937938956348164e-05, "loss": 7.0866, "step": 3610 }, { "epoch": 0.30985408034588363, "grad_norm": 0.2485278695821762, "learning_rate": 1.7774501125830112e-05, "loss": 7.0856, "step": 3620 }, { "epoch": 0.3116555575571969, "grad_norm": 0.25206252932548523, "learning_rate": 1.7611497255119735e-05, "loss": 7.0859, "step": 3630 }, { "epoch": 0.31345703476851017, "grad_norm": 0.24933861196041107, "learning_rate": 1.7448933130249112e-05, "loss": 7.0877, "step": 3640 }, { "epoch": 0.31525851197982346, "grad_norm": 0.25165891647338867, "learning_rate": 1.7286814521640918e-05, "loss": 7.085, "step": 3650 }, { "epoch": 0.31705998919113676, "grad_norm": 0.24684573709964752, "learning_rate": 1.7125147183903726e-05, "loss": 7.0865, "step": 3660 }, { "epoch": 0.31886146640245, "grad_norm": 0.2446172535419464, "learning_rate": 1.6963936855627625e-05, "loss": 7.0866, "step": 3670 }, { "epoch": 0.3206629436137633, "grad_norm": 0.25463560223579407, "learning_rate": 1.680318925918057e-05, "loss": 7.0848, "step": 3680 }, { "epoch": 0.32246442082507654, "grad_norm": 0.25411030650138855, "learning_rate": 1.6642910100505257e-05, "loss": 7.0834, "step": 3690 }, { "epoch": 0.32426589803638983, "grad_norm": 0.250730961561203, "learning_rate": 1.648310506891661e-05, "loss": 7.0894, "step": 3700 }, { "epoch": 0.32606737524770313, "grad_norm": 0.249891996383667, "learning_rate": 1.632377983689973e-05, "loss": 7.0853, "step": 3710 }, { "epoch": 0.32786885245901637, "grad_norm": 0.25684821605682373, "learning_rate": 1.61649400599087e-05, "loss": 7.0843, "step": 3720 }, { "epoch": 0.32967032967032966, "grad_norm": 0.2295835167169571, "learning_rate": 1.60065913761657e-05, "loss": 7.0751, "step": 3730 }, { "epoch": 0.33147180688164296, "grad_norm": 0.24857565760612488, "learning_rate": 1.5848739406460933e-05, "loss": 7.0794, "step": 3740 }, { "epoch": 0.3332732840929562, "grad_norm": 0.24784788489341736, "learning_rate": 1.5691389753953097e-05, "loss": 7.0816, "step": 3750 }, { "epoch": 0.3350747613042695, "grad_norm": 0.25636962056159973, "learning_rate": 1.553454800397053e-05, "loss": 7.0818, "step": 3760 }, { "epoch": 0.3368762385155828, "grad_norm": 0.25425460934638977, "learning_rate": 1.5378219723812835e-05, "loss": 7.0853, "step": 3770 }, { "epoch": 0.33867771572689603, "grad_norm": 0.23544788360595703, "learning_rate": 1.5222410462553445e-05, "loss": 7.0798, "step": 3780 }, { "epoch": 0.34047919293820933, "grad_norm": 0.25861406326293945, "learning_rate": 1.5067125750842475e-05, "loss": 7.0878, "step": 3790 }, { "epoch": 0.3422806701495226, "grad_norm": 0.2505762577056885, "learning_rate": 1.4912371100710509e-05, "loss": 7.0853, "step": 3800 }, { "epoch": 0.34408214736083587, "grad_norm": 0.25163373351097107, "learning_rate": 1.4758152005372888e-05, "loss": 7.0869, "step": 3810 }, { "epoch": 0.34588362457214916, "grad_norm": 0.25199687480926514, "learning_rate": 1.4604473939034815e-05, "loss": 7.0903, "step": 3820 }, { "epoch": 0.34768510178346246, "grad_norm": 0.2454589456319809, "learning_rate": 1.4451342356696867e-05, "loss": 7.0871, "step": 3830 }, { "epoch": 0.3494865789947757, "grad_norm": 0.25862643122673035, "learning_rate": 1.4298762693961561e-05, "loss": 7.0928, "step": 3840 }, { "epoch": 0.351288056206089, "grad_norm": 0.2593483626842499, "learning_rate": 1.414674036684028e-05, "loss": 7.0931, "step": 3850 }, { "epoch": 0.3530895334174023, "grad_norm": 0.25914037227630615, "learning_rate": 1.3995280771561055e-05, "loss": 7.0885, "step": 3860 }, { "epoch": 0.35489101062871553, "grad_norm": 0.25977805256843567, "learning_rate": 1.3844389284377028e-05, "loss": 7.0897, "step": 3870 }, { "epoch": 0.3566924878400288, "grad_norm": 0.24812103807926178, "learning_rate": 1.369407126137566e-05, "loss": 7.0871, "step": 3880 }, { "epoch": 0.3584939650513421, "grad_norm": 0.26340484619140625, "learning_rate": 1.3544332038288479e-05, "loss": 7.09, "step": 3890 }, { "epoch": 0.36029544226265536, "grad_norm": 0.25631335377693176, "learning_rate": 1.339517693030184e-05, "loss": 7.0927, "step": 3900 }, { "epoch": 0.36209691947396866, "grad_norm": 0.26711371541023254, "learning_rate": 1.3246611231868147e-05, "loss": 7.0952, "step": 3910 }, { "epoch": 0.36389839668528196, "grad_norm": 0.2552339732646942, "learning_rate": 1.309864021651796e-05, "loss": 7.0941, "step": 3920 }, { "epoch": 0.3656998738965952, "grad_norm": 0.24878251552581787, "learning_rate": 1.295126913667278e-05, "loss": 7.0963, "step": 3930 }, { "epoch": 0.3675013511079085, "grad_norm": 0.25014087557792664, "learning_rate": 1.2804503223458686e-05, "loss": 7.0932, "step": 3940 }, { "epoch": 0.3693028283192218, "grad_norm": 0.26266229152679443, "learning_rate": 1.2658347686520496e-05, "loss": 7.0938, "step": 3950 }, { "epoch": 0.37110430553053503, "grad_norm": 0.25358816981315613, "learning_rate": 1.2512807713837022e-05, "loss": 7.0926, "step": 3960 }, { "epoch": 0.3729057827418483, "grad_norm": 0.25957873463630676, "learning_rate": 1.2367888471536785e-05, "loss": 7.0904, "step": 3970 }, { "epoch": 0.3747072599531616, "grad_norm": 0.260268896818161, "learning_rate": 1.2223595103714696e-05, "loss": 7.0933, "step": 3980 }, { "epoch": 0.37650873716447486, "grad_norm": 0.26004722714424133, "learning_rate": 1.2079932732249429e-05, "loss": 7.0938, "step": 3990 }, { "epoch": 0.37831021437578816, "grad_norm": 0.25877878069877625, "learning_rate": 1.1936906456621685e-05, "loss": 7.0926, "step": 4000 }, { "epoch": 0.3801116915871014, "grad_norm": 0.25450918078422546, "learning_rate": 1.1794521353733035e-05, "loss": 7.0946, "step": 4010 }, { "epoch": 0.3819131687984147, "grad_norm": 0.2625286877155304, "learning_rate": 1.165278247772589e-05, "loss": 7.0928, "step": 4020 }, { "epoch": 0.383714646009728, "grad_norm": 0.2435232251882553, "learning_rate": 1.151169485980395e-05, "loss": 7.0894, "step": 4030 }, { "epoch": 0.38551612322104123, "grad_norm": 0.24893513321876526, "learning_rate": 1.137126350805369e-05, "loss": 7.0892, "step": 4040 }, { "epoch": 0.3873176004323545, "grad_norm": 0.25459447503089905, "learning_rate": 1.1231493407266563e-05, "loss": 7.0884, "step": 4050 }, { "epoch": 0.3891190776436678, "grad_norm": 0.2565847337245941, "learning_rate": 1.1092389518762105e-05, "loss": 7.0891, "step": 4060 }, { "epoch": 0.39092055485498106, "grad_norm": 0.24984680116176605, "learning_rate": 1.0953956780211724e-05, "loss": 7.0843, "step": 4070 }, { "epoch": 0.39272203206629436, "grad_norm": 0.2500856816768646, "learning_rate": 1.0816200105463564e-05, "loss": 7.0885, "step": 4080 }, { "epoch": 0.39452350927760765, "grad_norm": 0.25074025988578796, "learning_rate": 1.0679124384367976e-05, "loss": 7.0893, "step": 4090 }, { "epoch": 0.3963249864889209, "grad_norm": 0.24879977107048035, "learning_rate": 1.0542734482603992e-05, "loss": 7.0884, "step": 4100 }, { "epoch": 0.3981264637002342, "grad_norm": 0.27129289507865906, "learning_rate": 1.04070352415066e-05, "loss": 7.0917, "step": 4110 }, { "epoch": 0.3999279409115475, "grad_norm": 0.2533853352069855, "learning_rate": 1.027203147789494e-05, "loss": 7.0953, "step": 4120 }, { "epoch": 0.4017294181228607, "grad_norm": 0.2533014714717865, "learning_rate": 1.0137727983901207e-05, "loss": 7.0919, "step": 4130 }, { "epoch": 0.403530895334174, "grad_norm": 0.25204992294311523, "learning_rate": 1.0004129526800699e-05, "loss": 7.0898, "step": 4140 }, { "epoch": 0.4053323725454873, "grad_norm": 0.2648032009601593, "learning_rate": 9.871240848842495e-06, "loss": 7.0966, "step": 4150 }, { "epoch": 0.40713384975680056, "grad_norm": 0.2555524408817291, "learning_rate": 9.739066667081123e-06, "loss": 7.0937, "step": 4160 }, { "epoch": 0.40893532696811385, "grad_norm": 0.25934886932373047, "learning_rate": 9.60761167320919e-06, "loss": 7.0957, "step": 4170 }, { "epoch": 0.41073680417942715, "grad_norm": 0.24540553987026215, "learning_rate": 9.476880533390793e-06, "loss": 7.0938, "step": 4180 }, { "epoch": 0.4125382813907404, "grad_norm": 0.24666236340999603, "learning_rate": 9.346877888095892e-06, "loss": 7.0825, "step": 4190 }, { "epoch": 0.4143397586020537, "grad_norm": 0.24191080033779144, "learning_rate": 9.217608351935581e-06, "loss": 7.0874, "step": 4200 }, { "epoch": 0.416141235813367, "grad_norm": 0.23266373574733734, "learning_rate": 9.089076513498356e-06, "loss": 7.0793, "step": 4210 }, { "epoch": 0.4179427130246802, "grad_norm": 0.2409142106771469, "learning_rate": 8.961286935187103e-06, "loss": 7.0782, "step": 4220 }, { "epoch": 0.4197441902359935, "grad_norm": 0.23587916791439056, "learning_rate": 8.834244153057312e-06, "loss": 7.0759, "step": 4230 }, { "epoch": 0.4215456674473068, "grad_norm": 0.21966825425624847, "learning_rate": 8.707952676655923e-06, "loss": 7.0712, "step": 4240 }, { "epoch": 0.42334714465862006, "grad_norm": 0.22979901731014252, "learning_rate": 8.582416988861348e-06, "loss": 7.0717, "step": 4250 }, { "epoch": 0.42514862186993335, "grad_norm": 0.24065175652503967, "learning_rate": 8.457641545724275e-06, "loss": 7.0698, "step": 4260 }, { "epoch": 0.42695009908124665, "grad_norm": 0.23396599292755127, "learning_rate": 8.333630776309587e-06, "loss": 7.0766, "step": 4270 }, { "epoch": 0.4287515762925599, "grad_norm": 0.22919942438602448, "learning_rate": 8.210389082539016e-06, "loss": 7.0741, "step": 4280 }, { "epoch": 0.4305530535038732, "grad_norm": 0.24337969720363617, "learning_rate": 8.087920839035029e-06, "loss": 7.0759, "step": 4290 }, { "epoch": 0.4323545307151865, "grad_norm": 0.23486100137233734, "learning_rate": 7.966230392965444e-06, "loss": 7.076, "step": 4300 }, { "epoch": 0.4341560079264997, "grad_norm": 0.22724387049674988, "learning_rate": 7.845322063889174e-06, "loss": 7.0715, "step": 4310 }, { "epoch": 0.435957485137813, "grad_norm": 0.22696970403194427, "learning_rate": 7.725200143602866e-06, "loss": 7.0696, "step": 4320 }, { "epoch": 0.43775896234912626, "grad_norm": 0.2250368446111679, "learning_rate": 7.605868895988618e-06, "loss": 7.0661, "step": 4330 }, { "epoch": 0.43956043956043955, "grad_norm": 0.2187509685754776, "learning_rate": 7.4873325568625195e-06, "loss": 7.066, "step": 4340 }, { "epoch": 0.44136191677175285, "grad_norm": 0.22752021253108978, "learning_rate": 7.369595333824419e-06, "loss": 7.0671, "step": 4350 }, { "epoch": 0.4431633939830661, "grad_norm": 0.2048732042312622, "learning_rate": 7.252661406108483e-06, "loss": 7.0599, "step": 4360 }, { "epoch": 0.4449648711943794, "grad_norm": 0.2091006189584732, "learning_rate": 7.136534924434884e-06, "loss": 7.0596, "step": 4370 }, { "epoch": 0.4467663484056927, "grad_norm": 0.21846379339694977, "learning_rate": 7.021220010862445e-06, "loss": 7.0658, "step": 4380 }, { "epoch": 0.4485678256170059, "grad_norm": 0.22598609328269958, "learning_rate": 6.906720758642375e-06, "loss": 7.0684, "step": 4390 }, { "epoch": 0.4503693028283192, "grad_norm": 0.22264251112937927, "learning_rate": 6.793041232072874e-06, "loss": 7.0669, "step": 4400 }, { "epoch": 0.4521707800396325, "grad_norm": 0.22255371510982513, "learning_rate": 6.680185466354984e-06, "loss": 7.0668, "step": 4410 }, { "epoch": 0.45397225725094575, "grad_norm": 0.2280287891626358, "learning_rate": 6.568157467449266e-06, "loss": 7.0725, "step": 4420 }, { "epoch": 0.45577373446225905, "grad_norm": 0.2370598465204239, "learning_rate": 6.4569612119336295e-06, "loss": 7.0734, "step": 4430 }, { "epoch": 0.45757521167357235, "grad_norm": 0.23198629915714264, "learning_rate": 6.346600646862192e-06, "loss": 7.0731, "step": 4440 }, { "epoch": 0.4593766888848856, "grad_norm": 0.22905012965202332, "learning_rate": 6.237079689625182e-06, "loss": 7.076, "step": 4450 }, { "epoch": 0.4611781660961989, "grad_norm": 0.22975753247737885, "learning_rate": 6.128402227809814e-06, "loss": 7.0768, "step": 4460 }, { "epoch": 0.4629796433075122, "grad_norm": 0.23542596399784088, "learning_rate": 6.020572119062412e-06, "loss": 7.077, "step": 4470 }, { "epoch": 0.4647811205188254, "grad_norm": 0.23200629651546478, "learning_rate": 5.913593190951374e-06, "loss": 7.075, "step": 4480 }, { "epoch": 0.4665825977301387, "grad_norm": 0.23202374577522278, "learning_rate": 5.807469240831358e-06, "loss": 7.0591, "step": 4490 }, { "epoch": 0.468384074941452, "grad_norm": 0.22130438685417175, "learning_rate": 5.702204035708461e-06, "loss": 7.0587, "step": 4500 }, { "epoch": 0.47018555215276525, "grad_norm": 0.22268489003181458, "learning_rate": 5.597801312106568e-06, "loss": 7.0613, "step": 4510 }, { "epoch": 0.47198702936407855, "grad_norm": 0.21746228635311127, "learning_rate": 5.494264775934608e-06, "loss": 7.0625, "step": 4520 }, { "epoch": 0.47378850657539184, "grad_norm": 0.21862652897834778, "learning_rate": 5.391598102355129e-06, "loss": 7.065, "step": 4530 }, { "epoch": 0.4755899837867051, "grad_norm": 0.22985348105430603, "learning_rate": 5.289804935653762e-06, "loss": 7.0642, "step": 4540 }, { "epoch": 0.4773914609980184, "grad_norm": 0.2334289699792862, "learning_rate": 5.188888889109883e-06, "loss": 7.0665, "step": 4550 }, { "epoch": 0.4791929382093317, "grad_norm": 0.2367192506790161, "learning_rate": 5.08885354486837e-06, "loss": 7.0669, "step": 4560 }, { "epoch": 0.4809944154206449, "grad_norm": 0.23439788818359375, "learning_rate": 4.989702453812458e-06, "loss": 7.0639, "step": 4570 }, { "epoch": 0.4827958926319582, "grad_norm": 0.22463025152683258, "learning_rate": 4.891439135437624e-06, "loss": 7.06, "step": 4580 }, { "epoch": 0.4845973698432715, "grad_norm": 0.22817419469356537, "learning_rate": 4.794067077726775e-06, "loss": 7.0651, "step": 4590 }, { "epoch": 0.48639884705458475, "grad_norm": 0.22841177880764008, "learning_rate": 4.697589737026328e-06, "loss": 7.0648, "step": 4600 }, { "epoch": 0.48820032426589804, "grad_norm": 0.2334153801202774, "learning_rate": 4.602010537923597e-06, "loss": 7.064, "step": 4610 }, { "epoch": 0.49000180147721134, "grad_norm": 0.23840239644050598, "learning_rate": 4.507332873125177e-06, "loss": 7.07, "step": 4620 }, { "epoch": 0.4918032786885246, "grad_norm": 0.23254001140594482, "learning_rate": 4.413560103336585e-06, "loss": 7.0764, "step": 4630 }, { "epoch": 0.4936047558998379, "grad_norm": 0.2346353828907013, "learning_rate": 4.3206955571428765e-06, "loss": 7.0757, "step": 4640 }, { "epoch": 0.4954062331111511, "grad_norm": 0.24727889895439148, "learning_rate": 4.228742530890578e-06, "loss": 7.0797, "step": 4650 }, { "epoch": 0.4972077103224644, "grad_norm": 0.2581925094127655, "learning_rate": 4.137704288570634e-06, "loss": 7.0885, "step": 4660 }, { "epoch": 0.4990091875337777, "grad_norm": 0.2502412497997284, "learning_rate": 4.047584061702549e-06, "loss": 7.0935, "step": 4670 }, { "epoch": 0.500810664745091, "grad_norm": 0.26971665024757385, "learning_rate": 3.958385049219696e-06, "loss": 7.0954, "step": 4680 }, { "epoch": 0.5026121419564042, "grad_norm": 0.2606787383556366, "learning_rate": 3.8701104173557526e-06, "loss": 7.0941, "step": 4690 }, { "epoch": 0.5044136191677175, "grad_norm": 0.2621247470378876, "learning_rate": 3.7827632995323204e-06, "loss": 7.0954, "step": 4700 }, { "epoch": 0.5062150963790308, "grad_norm": 0.2539183795452118, "learning_rate": 3.6963467962477017e-06, "loss": 7.0911, "step": 4710 }, { "epoch": 0.5080165735903441, "grad_norm": 0.25110456347465515, "learning_rate": 3.610863974966838e-06, "loss": 7.0925, "step": 4720 }, { "epoch": 0.5098180508016573, "grad_norm": 0.24569188058376312, "learning_rate": 3.5263178700124165e-06, "loss": 7.093, "step": 4730 }, { "epoch": 0.5116195280129706, "grad_norm": 0.2517807185649872, "learning_rate": 3.4427114824571806e-06, "loss": 7.0872, "step": 4740 }, { "epoch": 0.5134210052242839, "grad_norm": 0.255975604057312, "learning_rate": 3.3600477800174024e-06, "loss": 7.0918, "step": 4750 }, { "epoch": 0.5152224824355972, "grad_norm": 0.2566058337688446, "learning_rate": 3.278329696947512e-06, "loss": 7.094, "step": 4760 }, { "epoch": 0.5170239596469105, "grad_norm": 0.2523749768733978, "learning_rate": 3.1975601339359983e-06, "loss": 7.0929, "step": 4770 }, { "epoch": 0.5188254368582238, "grad_norm": 0.25028109550476074, "learning_rate": 3.1177419580023803e-06, "loss": 7.0897, "step": 4780 }, { "epoch": 0.520626914069537, "grad_norm": 0.24556684494018555, "learning_rate": 3.03887800239548e-06, "loss": 7.0884, "step": 4790 }, { "epoch": 0.5224283912808503, "grad_norm": 0.24519598484039307, "learning_rate": 2.9609710664928436e-06, "loss": 7.0899, "step": 4800 }, { "epoch": 0.5242298684921636, "grad_norm": 0.2480156123638153, "learning_rate": 2.884023915701368e-06, "loss": 7.0842, "step": 4810 }, { "epoch": 0.5260313457034769, "grad_norm": 0.22788718342781067, "learning_rate": 2.808039281359139e-06, "loss": 7.0794, "step": 4820 }, { "epoch": 0.5278328229147902, "grad_norm": 0.24182246625423431, "learning_rate": 2.733019860638496e-06, "loss": 7.0782, "step": 4830 }, { "epoch": 0.5296343001261034, "grad_norm": 0.2469298541545868, "learning_rate": 2.658968316450273e-06, "loss": 7.0829, "step": 4840 }, { "epoch": 0.5314357773374166, "grad_norm": 0.2503180503845215, "learning_rate": 2.585887277349275e-06, "loss": 7.086, "step": 4850 }, { "epoch": 0.5332372545487299, "grad_norm": 0.24686779081821442, "learning_rate": 2.5137793374409877e-06, "loss": 7.0855, "step": 4860 }, { "epoch": 0.5350387317600432, "grad_norm": 0.24528920650482178, "learning_rate": 2.4426470562894864e-06, "loss": 7.0786, "step": 4870 }, { "epoch": 0.5368402089713565, "grad_norm": 0.24219372868537903, "learning_rate": 2.3724929588265875e-06, "loss": 7.0811, "step": 4880 }, { "epoch": 0.5386416861826698, "grad_norm": 0.2410127967596054, "learning_rate": 2.303319535262213e-06, "loss": 7.0771, "step": 4890 }, { "epoch": 0.540443163393983, "grad_norm": 0.23678304255008698, "learning_rate": 2.2351292409960156e-06, "loss": 7.0793, "step": 4900 }, { "epoch": 0.5422446406052963, "grad_norm": 0.23868654668331146, "learning_rate": 2.1679244965301846e-06, "loss": 7.0792, "step": 4910 }, { "epoch": 0.5440461178166096, "grad_norm": 0.23609353601932526, "learning_rate": 2.101707687383583e-06, "loss": 7.0765, "step": 4920 }, { "epoch": 0.5458475950279229, "grad_norm": 0.23896516859531403, "learning_rate": 2.0364811640070204e-06, "loss": 7.0719, "step": 4930 }, { "epoch": 0.5476490722392362, "grad_norm": 0.24208270013332367, "learning_rate": 1.972247241699836e-06, "loss": 7.0769, "step": 4940 }, { "epoch": 0.5494505494505495, "grad_norm": 0.2451712042093277, "learning_rate": 1.909008200527723e-06, "loss": 7.0789, "step": 4950 }, { "epoch": 0.5512520266618627, "grad_norm": 0.24089983105659485, "learning_rate": 1.8467662852418032e-06, "loss": 7.0817, "step": 4960 }, { "epoch": 0.553053503873176, "grad_norm": 0.2595120370388031, "learning_rate": 1.7855237051989005e-06, "loss": 7.0896, "step": 4970 }, { "epoch": 0.5548549810844893, "grad_norm": 0.2539128363132477, "learning_rate": 1.7252826342831806e-06, "loss": 7.0923, "step": 4980 }, { "epoch": 0.5566564582958026, "grad_norm": 0.2587498724460602, "learning_rate": 1.666045210828937e-06, "loss": 7.0917, "step": 4990 }, { "epoch": 0.5584579355071159, "grad_norm": 0.26577994227409363, "learning_rate": 1.6078135375447101e-06, "loss": 7.0969, "step": 5000 }, { "epoch": 0.5602594127184292, "grad_norm": 0.2595325708389282, "learning_rate": 1.5505896814386377e-06, "loss": 7.099, "step": 5010 }, { "epoch": 0.5620608899297423, "grad_norm": 0.26649215817451477, "learning_rate": 1.494375673745111e-06, "loss": 7.1029, "step": 5020 }, { "epoch": 0.5638623671410556, "grad_norm": 0.26752805709838867, "learning_rate": 1.4391735098526205e-06, "loss": 7.1029, "step": 5030 }, { "epoch": 0.5656638443523689, "grad_norm": 0.2581203281879425, "learning_rate": 1.3849851492329945e-06, "loss": 7.0943, "step": 5040 }, { "epoch": 0.5674653215636822, "grad_norm": 0.2541639506816864, "learning_rate": 1.3318125153717842e-06, "loss": 7.0975, "step": 5050 }, { "epoch": 0.5692667987749955, "grad_norm": 0.25562813878059387, "learning_rate": 1.2796574957000273e-06, "loss": 7.0956, "step": 5060 }, { "epoch": 0.5710682759863088, "grad_norm": 0.24820005893707275, "learning_rate": 1.2285219415272264e-06, "loss": 7.0917, "step": 5070 }, { "epoch": 0.572869753197622, "grad_norm": 0.2529694139957428, "learning_rate": 1.1784076679756538e-06, "loss": 7.0928, "step": 5080 }, { "epoch": 0.5746712304089353, "grad_norm": 0.24976292252540588, "learning_rate": 1.129316453915904e-06, "loss": 7.0869, "step": 5090 }, { "epoch": 0.5764727076202486, "grad_norm": 0.25373753905296326, "learning_rate": 1.081250041903773e-06, "loss": 7.0899, "step": 5100 }, { "epoch": 0.5782741848315619, "grad_norm": 0.24782034754753113, "learning_rate": 1.034210138118379e-06, "loss": 7.0894, "step": 5110 }, { "epoch": 0.5800756620428752, "grad_norm": 0.2461533397436142, "learning_rate": 9.881984123016063e-07, "loss": 7.0889, "step": 5120 }, { "epoch": 0.5818771392541884, "grad_norm": 0.24630357325077057, "learning_rate": 9.432164976988511e-07, "loss": 7.0897, "step": 5130 }, { "epoch": 0.5836786164655017, "grad_norm": 0.2412177324295044, "learning_rate": 8.99265991001026e-07, "loss": 7.0863, "step": 5140 }, { "epoch": 0.585480093676815, "grad_norm": 0.2493523508310318, "learning_rate": 8.563484522878995e-07, "loss": 7.0893, "step": 5150 }, { "epoch": 0.5872815708881283, "grad_norm": 0.2552441358566284, "learning_rate": 8.144654049727174e-07, "loss": 7.0853, "step": 5160 }, { "epoch": 0.5890830480994416, "grad_norm": 0.2476198971271515, "learning_rate": 7.736183357481086e-07, "loss": 7.0878, "step": 5170 }, { "epoch": 0.5908845253107549, "grad_norm": 0.2340099811553955, "learning_rate": 7.338086945333377e-07, "loss": 7.0856, "step": 5180 }, { "epoch": 0.592686002522068, "grad_norm": 0.23188483715057373, "learning_rate": 6.950378944228264e-07, "loss": 7.0802, "step": 5190 }, { "epoch": 0.5944874797333813, "grad_norm": 0.238411083817482, "learning_rate": 6.573073116359929e-07, "loss": 7.0797, "step": 5200 }, { "epoch": 0.5962889569446946, "grad_norm": 0.2307722419500351, "learning_rate": 6.206182854683984e-07, "loss": 7.0759, "step": 5210 }, { "epoch": 0.5980904341560079, "grad_norm": 0.23716938495635986, "learning_rate": 5.849721182442313e-07, "loss": 7.0785, "step": 5220 }, { "epoch": 0.5998919113673212, "grad_norm": 0.24141480028629303, "learning_rate": 5.50370075270038e-07, "loss": 7.0808, "step": 5230 }, { "epoch": 0.6016933885786345, "grad_norm": 0.23821581900119781, "learning_rate": 5.168133847898448e-07, "loss": 7.0831, "step": 5240 }, { "epoch": 0.6034948657899477, "grad_norm": 0.2506227493286133, "learning_rate": 4.843032379415402e-07, "loss": 7.0845, "step": 5250 }, { "epoch": 0.605296343001261, "grad_norm": 0.24686171114444733, "learning_rate": 4.5284078871460466e-07, "loss": 7.0868, "step": 5260 }, { "epoch": 0.6070978202125743, "grad_norm": 0.242891326546669, "learning_rate": 4.224271539091407e-07, "loss": 7.0893, "step": 5270 }, { "epoch": 0.6088992974238876, "grad_norm": 0.24332739412784576, "learning_rate": 3.930634130962374e-07, "loss": 7.0905, "step": 5280 }, { "epoch": 0.6107007746352009, "grad_norm": 0.2453061193227768, "learning_rate": 3.647506085796415e-07, "loss": 7.0909, "step": 5290 }, { "epoch": 0.6125022518465142, "grad_norm": 0.24784937500953674, "learning_rate": 3.374897453587733e-07, "loss": 7.0919, "step": 5300 }, { "epoch": 0.6143037290578274, "grad_norm": 0.24672792851924896, "learning_rate": 3.112817910930288e-07, "loss": 7.0904, "step": 5310 }, { "epoch": 0.6161052062691407, "grad_norm": 0.2514455020427704, "learning_rate": 2.861276760674636e-07, "loss": 7.0932, "step": 5320 }, { "epoch": 0.617906683480454, "grad_norm": 0.23828017711639404, "learning_rate": 2.620282931597362e-07, "loss": 7.087, "step": 5330 }, { "epoch": 0.6197081606917673, "grad_norm": 0.2430391162633896, "learning_rate": 2.3898449780844657e-07, "loss": 7.0835, "step": 5340 }, { "epoch": 0.6215096379030806, "grad_norm": 0.25155386328697205, "learning_rate": 2.169971079827504e-07, "loss": 7.0836, "step": 5350 }, { "epoch": 0.6233111151143939, "grad_norm": 0.24421241879463196, "learning_rate": 1.9606690415332916e-07, "loss": 7.081, "step": 5360 }, { "epoch": 0.625112592325707, "grad_norm": 0.23836137354373932, "learning_rate": 1.761946292646921e-07, "loss": 7.0819, "step": 5370 }, { "epoch": 0.6269140695370203, "grad_norm": 0.24216735363006592, "learning_rate": 1.5738098870879736e-07, "loss": 7.0809, "step": 5380 }, { "epoch": 0.6287155467483336, "grad_norm": 0.2466571182012558, "learning_rate": 1.3962665030000876e-07, "loss": 7.0819, "step": 5390 }, { "epoch": 0.6305170239596469, "grad_norm": 0.24312724173069, "learning_rate": 1.229322442514147e-07, "loss": 7.0801, "step": 5400 }, { "epoch": 0.6323185011709602, "grad_norm": 0.2460329681634903, "learning_rate": 1.0729836315242603e-07, "loss": 7.0827, "step": 5410 }, { "epoch": 0.6341199783822735, "grad_norm": 0.24453233182430267, "learning_rate": 9.272556194776294e-08, "loss": 7.0787, "step": 5420 }, { "epoch": 0.6359214555935867, "grad_norm": 0.24370291829109192, "learning_rate": 7.921435791774401e-08, "loss": 7.081, "step": 5430 }, { "epoch": 0.6377229328049, "grad_norm": 0.2505902945995331, "learning_rate": 6.67652306599409e-08, "loss": 7.0818, "step": 5440 }, { "epoch": 0.6395244100162133, "grad_norm": 0.23744668066501617, "learning_rate": 5.537862207213862e-08, "loss": 7.0838, "step": 5450 }, { "epoch": 0.6413258872275266, "grad_norm": 0.2528400123119354, "learning_rate": 4.505493633665147e-08, "loss": 7.0809, "step": 5460 }, { "epoch": 0.6431273644388399, "grad_norm": 0.24644063413143158, "learning_rate": 3.579453990598447e-08, "loss": 7.0816, "step": 5470 }, { "epoch": 0.6449288416501531, "grad_norm": 0.253196120262146, "learning_rate": 2.7597761489820406e-08, "loss": 7.0856, "step": 5480 }, { "epoch": 0.6467303188614664, "grad_norm": 0.25545504689216614, "learning_rate": 2.0464892043352556e-08, "loss": 7.0849, "step": 5490 }, { "epoch": 0.6485317960727797, "grad_norm": 0.2502111494541168, "learning_rate": 1.4396184756956254e-08, "loss": 7.0825, "step": 5500 }, { "epoch": 0.650333273284093, "grad_norm": 0.24788102507591248, "learning_rate": 9.391855047206077e-09, "loss": 7.0864, "step": 5510 }, { "epoch": 0.6521347504954063, "grad_norm": 0.24131740629673004, "learning_rate": 5.452080549215311e-09, "loss": 7.0852, "step": 5520 }, { "epoch": 0.6539362277067196, "grad_norm": 0.25021007657051086, "learning_rate": 2.5770011103543045e-09, "loss": 7.0886, "step": 5530 }, { "epoch": 0.6557377049180327, "grad_norm": 0.2536139190196991, "learning_rate": 7.667187852578028e-10, "loss": 7.092, "step": 5540 }, { "epoch": 0.657539182129346, "grad_norm": 0.2516995668411255, "learning_rate": 2.129783223447923e-11, "loss": 7.0894, "step": 5550 }, { "epoch": 0.0002269861286254729, "grad_norm": 0.19990862905979156, "learning_rate": 5.867552988165645e-05, "loss": 7.0869, "step": 5560 }, { "epoch": 0.00047919293820933165, "grad_norm": 0.19071044027805328, "learning_rate": 5.866816735118534e-05, "loss": 7.0379, "step": 5570 }, { "epoch": 0.0007313997477931904, "grad_norm": 0.21833942830562592, "learning_rate": 5.8660784878306686e-05, "loss": 7.0406, "step": 5580 }, { "epoch": 0.0009836065573770492, "grad_norm": 0.2672317326068878, "learning_rate": 5.865338246815596e-05, "loss": 7.0956, "step": 5590 }, { "epoch": 0.001235813366960908, "grad_norm": 0.23912940919399261, "learning_rate": 5.8645960125882484e-05, "loss": 7.0829, "step": 5600 }, { "epoch": 0.0014880201765447667, "grad_norm": 0.24160908162593842, "learning_rate": 5.8638517856649466e-05, "loss": 7.0885, "step": 5610 }, { "epoch": 0.0017402269861286255, "grad_norm": 0.23559272289276123, "learning_rate": 5.8631055665633956e-05, "loss": 7.0837, "step": 5620 }, { "epoch": 0.0019924337957124842, "grad_norm": 0.20176731050014496, "learning_rate": 5.8623573558026876e-05, "loss": 7.0735, "step": 5630 }, { "epoch": 0.002244640605296343, "grad_norm": 0.21517911553382874, "learning_rate": 5.8616071539033e-05, "loss": 7.0665, "step": 5640 }, { "epoch": 0.0024968474148802018, "grad_norm": 0.2154095470905304, "learning_rate": 5.860854961387093e-05, "loss": 7.0699, "step": 5650 }, { "epoch": 0.0027490542244640605, "grad_norm": 0.17505362629890442, "learning_rate": 5.860100778777316e-05, "loss": 7.0556, "step": 5660 }, { "epoch": 0.0030012610340479193, "grad_norm": 0.25252488255500793, "learning_rate": 5.8593446065985994e-05, "loss": 7.0728, "step": 5670 }, { "epoch": 0.003253467843631778, "grad_norm": 0.2561737298965454, "learning_rate": 5.858586445376957e-05, "loss": 7.0979, "step": 5680 }, { "epoch": 0.003505674653215637, "grad_norm": 0.2536204159259796, "learning_rate": 5.8578262956397895e-05, "loss": 7.0969, "step": 5690 }, { "epoch": 0.0037578814627994956, "grad_norm": 0.25124192237854004, "learning_rate": 5.857064157915879e-05, "loss": 7.0954, "step": 5700 }, { "epoch": 0.004010088272383355, "grad_norm": 0.24180065095424652, "learning_rate": 5.856300032735389e-05, "loss": 7.0759, "step": 5710 }, { "epoch": 0.0042622950819672135, "grad_norm": 0.22192548215389252, "learning_rate": 5.855533920629868e-05, "loss": 7.0741, "step": 5720 }, { "epoch": 0.004514501891551072, "grad_norm": 0.20840312540531158, "learning_rate": 5.8547658221322466e-05, "loss": 7.0687, "step": 5730 }, { "epoch": 0.004766708701134931, "grad_norm": 0.21551012992858887, "learning_rate": 5.853995737776837e-05, "loss": 7.0734, "step": 5740 }, { "epoch": 0.00501891551071879, "grad_norm": 0.2080470472574234, "learning_rate": 5.853223668099329e-05, "loss": 7.0797, "step": 5750 }, { "epoch": 0.005271122320302649, "grad_norm": 0.2597832977771759, "learning_rate": 5.8524496136367996e-05, "loss": 7.0981, "step": 5760 }, { "epoch": 0.005523329129886507, "grad_norm": 0.2497403919696808, "learning_rate": 5.8516735749277014e-05, "loss": 7.0988, "step": 5770 }, { "epoch": 0.005775535939470366, "grad_norm": 0.2296038568019867, "learning_rate": 5.850895552511871e-05, "loss": 7.0855, "step": 5780 }, { "epoch": 0.006027742749054225, "grad_norm": 0.25063857436180115, "learning_rate": 5.850115546930523e-05, "loss": 7.0757, "step": 5790 }, { "epoch": 0.006279949558638084, "grad_norm": 0.25278040766716003, "learning_rate": 5.849333558726251e-05, "loss": 7.0704, "step": 5800 }, { "epoch": 0.006532156368221942, "grad_norm": 0.2536276578903198, "learning_rate": 5.848549588443029e-05, "loss": 7.0879, "step": 5810 }, { "epoch": 0.006784363177805801, "grad_norm": 0.24107541143894196, "learning_rate": 5.847763636626211e-05, "loss": 7.0794, "step": 5820 }, { "epoch": 0.00703656998738966, "grad_norm": 0.2464154064655304, "learning_rate": 5.846975703822526e-05, "loss": 7.0885, "step": 5830 }, { "epoch": 0.007288776796973519, "grad_norm": 0.25456055998802185, "learning_rate": 5.846185790580083e-05, "loss": 7.0767, "step": 5840 }, { "epoch": 0.0075409836065573775, "grad_norm": 0.23196366429328918, "learning_rate": 5.8453938974483694e-05, "loss": 7.0894, "step": 5850 }, { "epoch": 0.007793190416141236, "grad_norm": 0.23861180245876312, "learning_rate": 5.844600024978248e-05, "loss": 7.0828, "step": 5860 }, { "epoch": 0.008045397225725094, "grad_norm": 0.24323299527168274, "learning_rate": 5.84380417372196e-05, "loss": 7.0808, "step": 5870 }, { "epoch": 0.008297604035308954, "grad_norm": 0.18533693253993988, "learning_rate": 5.843006344233123e-05, "loss": 7.0523, "step": 5880 }, { "epoch": 0.008549810844892812, "grad_norm": 0.21809923648834229, "learning_rate": 5.842206537066729e-05, "loss": 7.0648, "step": 5890 }, { "epoch": 0.008802017654476671, "grad_norm": 0.25461652874946594, "learning_rate": 5.84140475277915e-05, "loss": 7.0812, "step": 5900 }, { "epoch": 0.00905422446406053, "grad_norm": 0.25935718417167664, "learning_rate": 5.840600991928127e-05, "loss": 7.0977, "step": 5910 }, { "epoch": 0.009306431273644389, "grad_norm": 0.2506691515445709, "learning_rate": 5.8397952550727806e-05, "loss": 7.0925, "step": 5920 }, { "epoch": 0.009558638083228247, "grad_norm": 0.24012018740177155, "learning_rate": 5.8389875427736046e-05, "loss": 7.0897, "step": 5930 }, { "epoch": 0.009810844892812106, "grad_norm": 0.22104308009147644, "learning_rate": 5.8381778555924675e-05, "loss": 7.0846, "step": 5940 }, { "epoch": 0.010063051702395964, "grad_norm": 0.2395235151052475, "learning_rate": 5.8373661940926116e-05, "loss": 7.0786, "step": 5950 }, { "epoch": 0.010315258511979824, "grad_norm": 0.19813287258148193, "learning_rate": 5.836552558838651e-05, "loss": 7.0652, "step": 5960 }, { "epoch": 0.010567465321563682, "grad_norm": 0.25186845660209656, "learning_rate": 5.835736950396573e-05, "loss": 7.081, "step": 5970 }, { "epoch": 0.010819672131147541, "grad_norm": 0.18955203890800476, "learning_rate": 5.834919369333741e-05, "loss": 7.0545, "step": 5980 }, { "epoch": 0.0110718789407314, "grad_norm": 0.1905108094215393, "learning_rate": 5.8340998162188877e-05, "loss": 7.0332, "step": 5990 }, { "epoch": 0.011324085750315259, "grad_norm": 0.19776774942874908, "learning_rate": 5.833278291622116e-05, "loss": 7.0297, "step": 6000 }, { "epoch": 0.011576292559899117, "grad_norm": 0.19011956453323364, "learning_rate": 5.8324547961149036e-05, "loss": 7.0335, "step": 6010 }, { "epoch": 0.011828499369482976, "grad_norm": 0.2376624494791031, "learning_rate": 5.831629330270098e-05, "loss": 7.0495, "step": 6020 }, { "epoch": 0.012080706179066834, "grad_norm": 0.2361890971660614, "learning_rate": 5.830801894661916e-05, "loss": 7.0878, "step": 6030 }, { "epoch": 0.012332912988650694, "grad_norm": 0.2461146116256714, "learning_rate": 5.829972489865947e-05, "loss": 7.0925, "step": 6040 }, { "epoch": 0.012585119798234552, "grad_norm": 0.22894535958766937, "learning_rate": 5.8291411164591484e-05, "loss": 7.0836, "step": 6050 }, { "epoch": 0.012837326607818412, "grad_norm": 0.21714922785758972, "learning_rate": 5.8283077750198475e-05, "loss": 7.0655, "step": 6060 }, { "epoch": 0.01308953341740227, "grad_norm": 0.22002795338630676, "learning_rate": 5.8274724661277414e-05, "loss": 7.0632, "step": 6070 }, { "epoch": 0.013341740226986129, "grad_norm": 0.2430749237537384, "learning_rate": 5.826635190363895e-05, "loss": 7.079, "step": 6080 }, { "epoch": 0.013593947036569987, "grad_norm": 0.22506120800971985, "learning_rate": 5.825795948310741e-05, "loss": 7.0877, "step": 6090 }, { "epoch": 0.013846153846153847, "grad_norm": 0.2421126365661621, "learning_rate": 5.8249547405520805e-05, "loss": 7.077, "step": 6100 }, { "epoch": 0.014098360655737704, "grad_norm": 0.22421644628047943, "learning_rate": 5.824111567673082e-05, "loss": 7.079, "step": 6110 }, { "epoch": 0.014350567465321564, "grad_norm": 0.2314075082540512, "learning_rate": 5.8232664302602816e-05, "loss": 7.0778, "step": 6120 }, { "epoch": 0.014602774274905422, "grad_norm": 0.23487615585327148, "learning_rate": 5.822419328901582e-05, "loss": 7.0718, "step": 6130 }, { "epoch": 0.014854981084489282, "grad_norm": 0.23548173904418945, "learning_rate": 5.821570264186249e-05, "loss": 7.0881, "step": 6140 }, { "epoch": 0.01510718789407314, "grad_norm": 0.24325810372829437, "learning_rate": 5.820719236704919e-05, "loss": 7.086, "step": 6150 }, { "epoch": 0.015359394703657, "grad_norm": 0.22311294078826904, "learning_rate": 5.81986624704959e-05, "loss": 7.0807, "step": 6160 }, { "epoch": 0.015611601513240857, "grad_norm": 0.22908805310726166, "learning_rate": 5.819011295813626e-05, "loss": 7.0775, "step": 6170 }, { "epoch": 0.015863808322824715, "grad_norm": 0.24734221398830414, "learning_rate": 5.818154383591758e-05, "loss": 7.0794, "step": 6180 }, { "epoch": 0.016116015132408575, "grad_norm": 0.24485847353935242, "learning_rate": 5.817295510980077e-05, "loss": 7.0902, "step": 6190 }, { "epoch": 0.016368221941992434, "grad_norm": 0.21798300743103027, "learning_rate": 5.816434678576041e-05, "loss": 7.0843, "step": 6200 }, { "epoch": 0.016620428751576294, "grad_norm": 0.2364831119775772, "learning_rate": 5.815571886978467e-05, "loss": 7.0602, "step": 6210 }, { "epoch": 0.01687263556116015, "grad_norm": 0.22057068347930908, "learning_rate": 5.814707136787541e-05, "loss": 7.063, "step": 6220 }, { "epoch": 0.01712484237074401, "grad_norm": 0.25027430057525635, "learning_rate": 5.813840428604808e-05, "loss": 7.072, "step": 6230 }, { "epoch": 0.01737704918032787, "grad_norm": 0.23681822419166565, "learning_rate": 5.812971763033174e-05, "loss": 7.0866, "step": 6240 }, { "epoch": 0.01762925598991173, "grad_norm": 0.1914299577474594, "learning_rate": 5.812101140676909e-05, "loss": 7.0498, "step": 6250 }, { "epoch": 0.017881462799495585, "grad_norm": 0.21080397069454193, "learning_rate": 5.811228562141642e-05, "loss": 7.0723, "step": 6260 }, { "epoch": 0.018133669609079445, "grad_norm": 0.2181672900915146, "learning_rate": 5.8103540280343646e-05, "loss": 7.0624, "step": 6270 }, { "epoch": 0.018385876418663304, "grad_norm": 0.24125191569328308, "learning_rate": 5.8094775389634285e-05, "loss": 7.069, "step": 6280 }, { "epoch": 0.018638083228247164, "grad_norm": 0.24391724169254303, "learning_rate": 5.8085990955385434e-05, "loss": 7.0899, "step": 6290 }, { "epoch": 0.01889029003783102, "grad_norm": 0.22673499584197998, "learning_rate": 5.807718698370782e-05, "loss": 7.0823, "step": 6300 }, { "epoch": 0.01914249684741488, "grad_norm": 0.22614119946956635, "learning_rate": 5.806836348072572e-05, "loss": 7.0715, "step": 6310 }, { "epoch": 0.01939470365699874, "grad_norm": 0.2293558418750763, "learning_rate": 5.805952045257704e-05, "loss": 7.0807, "step": 6320 }, { "epoch": 0.0196469104665826, "grad_norm": 0.22892390191555023, "learning_rate": 5.805065790541323e-05, "loss": 7.0733, "step": 6330 }, { "epoch": 0.019899117276166455, "grad_norm": 0.1688012033700943, "learning_rate": 5.8041775845399335e-05, "loss": 7.073, "step": 6340 }, { "epoch": 0.020151324085750315, "grad_norm": 0.1622321456670761, "learning_rate": 5.803287427871398e-05, "loss": 7.0165, "step": 6350 }, { "epoch": 0.020403530895334174, "grad_norm": 0.1625095009803772, "learning_rate": 5.802395321154936e-05, "loss": 7.0234, "step": 6360 }, { "epoch": 0.020655737704918034, "grad_norm": 0.18573230504989624, "learning_rate": 5.80150126501112e-05, "loss": 7.0257, "step": 6370 }, { "epoch": 0.02090794451450189, "grad_norm": 0.18333572149276733, "learning_rate": 5.800605260061885e-05, "loss": 7.0308, "step": 6380 }, { "epoch": 0.02116015132408575, "grad_norm": 0.22298277914524078, "learning_rate": 5.799707306930516e-05, "loss": 7.0646, "step": 6390 }, { "epoch": 0.02141235813366961, "grad_norm": 0.22645047307014465, "learning_rate": 5.798807406241653e-05, "loss": 7.0816, "step": 6400 }, { "epoch": 0.02166456494325347, "grad_norm": 0.23614764213562012, "learning_rate": 5.7979055586212975e-05, "loss": 7.079, "step": 6410 }, { "epoch": 0.021916771752837325, "grad_norm": 0.22329796850681305, "learning_rate": 5.797001764696798e-05, "loss": 7.0792, "step": 6420 }, { "epoch": 0.022168978562421185, "grad_norm": 0.23304259777069092, "learning_rate": 5.7960960250968605e-05, "loss": 7.0797, "step": 6430 }, { "epoch": 0.022421185372005045, "grad_norm": 0.231819748878479, "learning_rate": 5.795188340451544e-05, "loss": 7.0759, "step": 6440 }, { "epoch": 0.022673392181588904, "grad_norm": 0.24274131655693054, "learning_rate": 5.794278711392259e-05, "loss": 7.0652, "step": 6450 }, { "epoch": 0.02292559899117276, "grad_norm": 0.23719048500061035, "learning_rate": 5.793367138551772e-05, "loss": 7.0817, "step": 6460 }, { "epoch": 0.02317780580075662, "grad_norm": 0.22217978537082672, "learning_rate": 5.792453622564197e-05, "loss": 7.0858, "step": 6470 }, { "epoch": 0.02343001261034048, "grad_norm": 0.23536349833011627, "learning_rate": 5.7915381640650035e-05, "loss": 7.0832, "step": 6480 }, { "epoch": 0.02368221941992434, "grad_norm": 0.22834347188472748, "learning_rate": 5.7906207636910105e-05, "loss": 7.0847, "step": 6490 }, { "epoch": 0.023934426229508195, "grad_norm": 0.20580360293388367, "learning_rate": 5.789701422080389e-05, "loss": 7.0701, "step": 6500 }, { "epoch": 0.024186633039092055, "grad_norm": 0.22056245803833008, "learning_rate": 5.7887801398726585e-05, "loss": 7.0636, "step": 6510 }, { "epoch": 0.024438839848675915, "grad_norm": 0.21607635915279388, "learning_rate": 5.787856917708691e-05, "loss": 7.0613, "step": 6520 }, { "epoch": 0.024691046658259774, "grad_norm": 0.23155520856380463, "learning_rate": 5.7869317562307055e-05, "loss": 7.0773, "step": 6530 }, { "epoch": 0.02494325346784363, "grad_norm": 0.20843277871608734, "learning_rate": 5.7860046560822714e-05, "loss": 7.0824, "step": 6540 }, { "epoch": 0.02519546027742749, "grad_norm": 0.1864456981420517, "learning_rate": 5.7850756179083065e-05, "loss": 7.0563, "step": 6550 }, { "epoch": 0.02544766708701135, "grad_norm": 0.19915568828582764, "learning_rate": 5.784144642355076e-05, "loss": 7.0679, "step": 6560 }, { "epoch": 0.02569987389659521, "grad_norm": 0.1775684356689453, "learning_rate": 5.783211730070195e-05, "loss": 7.059, "step": 6570 }, { "epoch": 0.025952080706179066, "grad_norm": 0.19035448133945465, "learning_rate": 5.782276881702623e-05, "loss": 7.0617, "step": 6580 }, { "epoch": 0.026204287515762925, "grad_norm": 0.21015025675296783, "learning_rate": 5.7813400979026685e-05, "loss": 7.0649, "step": 6590 }, { "epoch": 0.026456494325346785, "grad_norm": 0.22353969514369965, "learning_rate": 5.780401379321984e-05, "loss": 7.0708, "step": 6600 }, { "epoch": 0.026708701134930644, "grad_norm": 0.21002615988254547, "learning_rate": 5.77946072661357e-05, "loss": 7.0748, "step": 6610 }, { "epoch": 0.0269609079445145, "grad_norm": 0.2259298712015152, "learning_rate": 5.778518140431773e-05, "loss": 7.0665, "step": 6620 }, { "epoch": 0.02721311475409836, "grad_norm": 0.23019148409366608, "learning_rate": 5.7775736214322824e-05, "loss": 7.077, "step": 6630 }, { "epoch": 0.02746532156368222, "grad_norm": 0.2269354909658432, "learning_rate": 5.7766271702721325e-05, "loss": 7.0805, "step": 6640 }, { "epoch": 0.02771752837326608, "grad_norm": 0.2090812772512436, "learning_rate": 5.7756787876097026e-05, "loss": 7.0684, "step": 6650 }, { "epoch": 0.027969735182849936, "grad_norm": 0.17719708383083344, "learning_rate": 5.774728474104714e-05, "loss": 7.0606, "step": 6660 }, { "epoch": 0.028221941992433795, "grad_norm": 0.2342524230480194, "learning_rate": 5.773776230418235e-05, "loss": 7.0616, "step": 6670 }, { "epoch": 0.028474148802017655, "grad_norm": 0.23644807934761047, "learning_rate": 5.7728220572126705e-05, "loss": 7.0862, "step": 6680 }, { "epoch": 0.028726355611601515, "grad_norm": 0.23556506633758545, "learning_rate": 5.771865955151775e-05, "loss": 7.0782, "step": 6690 }, { "epoch": 0.02897856242118537, "grad_norm": 0.20833085477352142, "learning_rate": 5.7709079249006364e-05, "loss": 7.0775, "step": 6700 }, { "epoch": 0.02923076923076923, "grad_norm": 0.20913469791412354, "learning_rate": 5.7699479671256915e-05, "loss": 7.0678, "step": 6710 }, { "epoch": 0.02948297604035309, "grad_norm": 0.2128690630197525, "learning_rate": 5.7689860824947134e-05, "loss": 7.0727, "step": 6720 }, { "epoch": 0.02973518284993695, "grad_norm": 0.2314910739660263, "learning_rate": 5.7680222716768174e-05, "loss": 7.079, "step": 6730 }, { "epoch": 0.029987389659520806, "grad_norm": 0.20648358762264252, "learning_rate": 5.767056535342457e-05, "loss": 7.0739, "step": 6740 }, { "epoch": 0.030239596469104665, "grad_norm": 0.21808314323425293, "learning_rate": 5.766088874163429e-05, "loss": 7.0605, "step": 6750 }, { "epoch": 0.030491803278688525, "grad_norm": 0.19629353284835815, "learning_rate": 5.7651192888128634e-05, "loss": 7.0577, "step": 6760 }, { "epoch": 0.030744010088272385, "grad_norm": 0.20330224931240082, "learning_rate": 5.764147779965234e-05, "loss": 7.0509, "step": 6770 }, { "epoch": 0.03099621689785624, "grad_norm": 0.21518731117248535, "learning_rate": 5.76317434829635e-05, "loss": 7.0576, "step": 6780 }, { "epoch": 0.0312484237074401, "grad_norm": 0.23389771580696106, "learning_rate": 5.7621989944833575e-05, "loss": 7.0752, "step": 6790 }, { "epoch": 0.03150063051702396, "grad_norm": 0.23257946968078613, "learning_rate": 5.761221719204743e-05, "loss": 7.0823, "step": 6800 }, { "epoch": 0.031752837326607816, "grad_norm": 0.2178701013326645, "learning_rate": 5.760242523140326e-05, "loss": 7.0775, "step": 6810 }, { "epoch": 0.03200504413619168, "grad_norm": 0.20068123936653137, "learning_rate": 5.759261406971264e-05, "loss": 7.0699, "step": 6820 }, { "epoch": 0.032257250945775535, "grad_norm": 0.20975187420845032, "learning_rate": 5.75827837138005e-05, "loss": 7.0671, "step": 6830 }, { "epoch": 0.03250945775535939, "grad_norm": 0.22422073781490326, "learning_rate": 5.7572934170505115e-05, "loss": 7.0677, "step": 6840 }, { "epoch": 0.032761664564943255, "grad_norm": 0.17535652220249176, "learning_rate": 5.756306544667811e-05, "loss": 7.0416, "step": 6850 }, { "epoch": 0.03301387137452711, "grad_norm": 0.16785463690757751, "learning_rate": 5.755317754918447e-05, "loss": 7.0206, "step": 6860 }, { "epoch": 0.033266078184110974, "grad_norm": 0.17005743086338043, "learning_rate": 5.7543270484902496e-05, "loss": 7.0193, "step": 6870 }, { "epoch": 0.03351828499369483, "grad_norm": 0.15321427583694458, "learning_rate": 5.753334426072382e-05, "loss": 7.0245, "step": 6880 }, { "epoch": 0.033770491803278686, "grad_norm": 0.23841974139213562, "learning_rate": 5.752339888355342e-05, "loss": 7.0334, "step": 6890 }, { "epoch": 0.03402269861286255, "grad_norm": 0.23893627524375916, "learning_rate": 5.751343436030959e-05, "loss": 7.0795, "step": 6900 }, { "epoch": 0.034274905422446406, "grad_norm": 0.22600780427455902, "learning_rate": 5.750345069792393e-05, "loss": 7.0807, "step": 6910 }, { "epoch": 0.03452711223203026, "grad_norm": 0.23098847270011902, "learning_rate": 5.749344790334138e-05, "loss": 7.0817, "step": 6920 }, { "epoch": 0.034779319041614125, "grad_norm": 0.2163378894329071, "learning_rate": 5.7483425983520165e-05, "loss": 7.0805, "step": 6930 }, { "epoch": 0.03503152585119798, "grad_norm": 0.21547213196754456, "learning_rate": 5.747338494543182e-05, "loss": 7.0597, "step": 6940 }, { "epoch": 0.035283732660781844, "grad_norm": 0.1921781450510025, "learning_rate": 5.746332479606119e-05, "loss": 7.0582, "step": 6950 }, { "epoch": 0.0355359394703657, "grad_norm": 0.24126137793064117, "learning_rate": 5.745324554240639e-05, "loss": 7.0621, "step": 6960 }, { "epoch": 0.035788146279949556, "grad_norm": 0.23615024983882904, "learning_rate": 5.744314719147885e-05, "loss": 7.0854, "step": 6970 }, { "epoch": 0.03604035308953342, "grad_norm": 0.18157872557640076, "learning_rate": 5.74330297503033e-05, "loss": 7.0426, "step": 6980 }, { "epoch": 0.036292559899117276, "grad_norm": 0.1624717116355896, "learning_rate": 5.742289322591769e-05, "loss": 7.0495, "step": 6990 }, { "epoch": 0.03654476670870113, "grad_norm": 0.18465650081634521, "learning_rate": 5.741273762537329e-05, "loss": 7.0445, "step": 7000 }, { "epoch": 0.036796973518284995, "grad_norm": 0.2118595838546753, "learning_rate": 5.740256295573464e-05, "loss": 7.0601, "step": 7010 }, { "epoch": 0.03704918032786885, "grad_norm": 0.20609340071678162, "learning_rate": 5.739236922407952e-05, "loss": 7.0658, "step": 7020 }, { "epoch": 0.037301387137452714, "grad_norm": 0.21710552275180817, "learning_rate": 5.738215643749899e-05, "loss": 7.0717, "step": 7030 }, { "epoch": 0.03755359394703657, "grad_norm": 0.22812005877494812, "learning_rate": 5.737192460309737e-05, "loss": 7.0737, "step": 7040 }, { "epoch": 0.03780580075662043, "grad_norm": 0.21236513555049896, "learning_rate": 5.7361673727992214e-05, "loss": 7.0755, "step": 7050 }, { "epoch": 0.03805800756620429, "grad_norm": 0.2259535789489746, "learning_rate": 5.735140381931432e-05, "loss": 7.0724, "step": 7060 }, { "epoch": 0.038310214375788146, "grad_norm": 0.22349607944488525, "learning_rate": 5.734111488420774e-05, "loss": 7.0758, "step": 7070 }, { "epoch": 0.038562421185372, "grad_norm": 0.21958306431770325, "learning_rate": 5.7330806929829774e-05, "loss": 7.0724, "step": 7080 }, { "epoch": 0.038814627994955865, "grad_norm": 0.20679834485054016, "learning_rate": 5.73204799633509e-05, "loss": 7.0727, "step": 7090 }, { "epoch": 0.03906683480453972, "grad_norm": 0.21523764729499817, "learning_rate": 5.731013399195487e-05, "loss": 7.0643, "step": 7100 }, { "epoch": 0.039319041614123584, "grad_norm": 0.1858968883752823, "learning_rate": 5.7299769022838636e-05, "loss": 7.0614, "step": 7110 }, { "epoch": 0.03957124842370744, "grad_norm": 0.18624641001224518, "learning_rate": 5.728938506321239e-05, "loss": 7.0626, "step": 7120 }, { "epoch": 0.0398234552332913, "grad_norm": 0.18578192591667175, "learning_rate": 5.72789821202995e-05, "loss": 7.0609, "step": 7130 }, { "epoch": 0.04007566204287516, "grad_norm": 0.18121884763240814, "learning_rate": 5.726856020133654e-05, "loss": 7.0239, "step": 7140 }, { "epoch": 0.040327868852459016, "grad_norm": 0.17078937590122223, "learning_rate": 5.725811931357333e-05, "loss": 7.03, "step": 7150 }, { "epoch": 0.04058007566204287, "grad_norm": 0.15586864948272705, "learning_rate": 5.724765946427283e-05, "loss": 7.024, "step": 7160 }, { "epoch": 0.040832282471626735, "grad_norm": 0.14800262451171875, "learning_rate": 5.723718066071124e-05, "loss": 7.0213, "step": 7170 }, { "epoch": 0.04108448928121059, "grad_norm": 0.21201112866401672, "learning_rate": 5.722668291017789e-05, "loss": 7.0624, "step": 7180 }, { "epoch": 0.041336696090794454, "grad_norm": 0.22195960581302643, "learning_rate": 5.721616621997533e-05, "loss": 7.0763, "step": 7190 }, { "epoch": 0.04158890290037831, "grad_norm": 0.2239072024822235, "learning_rate": 5.720563059741927e-05, "loss": 7.0758, "step": 7200 }, { "epoch": 0.04184110970996217, "grad_norm": 0.21877072751522064, "learning_rate": 5.719507604983861e-05, "loss": 7.0629, "step": 7210 }, { "epoch": 0.04209331651954603, "grad_norm": 0.22755898535251617, "learning_rate": 5.7184502584575384e-05, "loss": 7.0775, "step": 7220 }, { "epoch": 0.042345523329129886, "grad_norm": 0.19540785253047943, "learning_rate": 5.717391020898481e-05, "loss": 7.0644, "step": 7230 }, { "epoch": 0.04259773013871374, "grad_norm": 0.20183035731315613, "learning_rate": 5.7163298930435237e-05, "loss": 7.0662, "step": 7240 }, { "epoch": 0.042849936948297605, "grad_norm": 0.23004983365535736, "learning_rate": 5.7152668756308195e-05, "loss": 7.0721, "step": 7250 }, { "epoch": 0.04310214375788146, "grad_norm": 0.22353877127170563, "learning_rate": 5.7142019693998334e-05, "loss": 7.08, "step": 7260 }, { "epoch": 0.043354350567465325, "grad_norm": 0.1993200182914734, "learning_rate": 5.713135175091345e-05, "loss": 7.0624, "step": 7270 }, { "epoch": 0.04360655737704918, "grad_norm": 0.18610408902168274, "learning_rate": 5.712066493447447e-05, "loss": 7.0638, "step": 7280 }, { "epoch": 0.04385876418663304, "grad_norm": 0.19349895417690277, "learning_rate": 5.7109959252115455e-05, "loss": 7.0422, "step": 7290 }, { "epoch": 0.0441109709962169, "grad_norm": 0.17549240589141846, "learning_rate": 5.709923471128359e-05, "loss": 7.0446, "step": 7300 }, { "epoch": 0.044363177805800756, "grad_norm": 0.1589989811182022, "learning_rate": 5.708849131943917e-05, "loss": 7.0163, "step": 7310 }, { "epoch": 0.04461538461538461, "grad_norm": 0.16704930365085602, "learning_rate": 5.707772908405562e-05, "loss": 7.042, "step": 7320 }, { "epoch": 0.044867591424968475, "grad_norm": 0.15386879444122314, "learning_rate": 5.706694801261946e-05, "loss": 7.0419, "step": 7330 }, { "epoch": 0.04511979823455233, "grad_norm": 0.22437438368797302, "learning_rate": 5.705614811263031e-05, "loss": 7.0509, "step": 7340 }, { "epoch": 0.045372005044136195, "grad_norm": 0.2204255759716034, "learning_rate": 5.704532939160089e-05, "loss": 7.0739, "step": 7350 }, { "epoch": 0.04562421185372005, "grad_norm": 0.21893218159675598, "learning_rate": 5.703449185705703e-05, "loss": 7.0782, "step": 7360 }, { "epoch": 0.04587641866330391, "grad_norm": 0.20865917205810547, "learning_rate": 5.702363551653762e-05, "loss": 7.0684, "step": 7370 }, { "epoch": 0.04612862547288777, "grad_norm": 0.21959102153778076, "learning_rate": 5.7012760377594645e-05, "loss": 7.0688, "step": 7380 }, { "epoch": 0.046380832282471626, "grad_norm": 0.18961955606937408, "learning_rate": 5.700186644779318e-05, "loss": 7.0565, "step": 7390 }, { "epoch": 0.04663303909205548, "grad_norm": 0.19530710577964783, "learning_rate": 5.699095373471134e-05, "loss": 7.0528, "step": 7400 }, { "epoch": 0.046885245901639346, "grad_norm": 0.2055942714214325, "learning_rate": 5.6980022245940346e-05, "loss": 7.0604, "step": 7410 }, { "epoch": 0.0471374527112232, "grad_norm": 0.21800529956817627, "learning_rate": 5.6969071989084435e-05, "loss": 7.0722, "step": 7420 }, { "epoch": 0.047389659520807065, "grad_norm": 0.21817395091056824, "learning_rate": 5.695810297176093e-05, "loss": 7.0734, "step": 7430 }, { "epoch": 0.04764186633039092, "grad_norm": 0.2039843499660492, "learning_rate": 5.69471152016002e-05, "loss": 7.0724, "step": 7440 }, { "epoch": 0.04789407313997478, "grad_norm": 0.1925882250070572, "learning_rate": 5.693610868624567e-05, "loss": 7.0526, "step": 7450 }, { "epoch": 0.04814627994955864, "grad_norm": 0.1798352152109146, "learning_rate": 5.6925083433353774e-05, "loss": 7.0504, "step": 7460 }, { "epoch": 0.048398486759142496, "grad_norm": 0.22475120425224304, "learning_rate": 5.6914039450593994e-05, "loss": 7.0539, "step": 7470 }, { "epoch": 0.04865069356872635, "grad_norm": 0.1234198808670044, "learning_rate": 5.690297674564885e-05, "loss": 7.0443, "step": 7480 }, { "epoch": 0.048902900378310216, "grad_norm": 0.1423250436782837, "learning_rate": 5.689189532621389e-05, "loss": 7.0022, "step": 7490 }, { "epoch": 0.04915510718789407, "grad_norm": 0.1332167237997055, "learning_rate": 5.688079519999765e-05, "loss": 7.0018, "step": 7500 }, { "epoch": 0.049407313997477935, "grad_norm": 0.1508467048406601, "learning_rate": 5.686967637472172e-05, "loss": 7.0102, "step": 7510 }, { "epoch": 0.04965952080706179, "grad_norm": 0.14652985334396362, "learning_rate": 5.685853885812066e-05, "loss": 7.0032, "step": 7520 }, { "epoch": 0.04991172761664565, "grad_norm": 0.18656450510025024, "learning_rate": 5.684738265794206e-05, "loss": 7.0504, "step": 7530 }, { "epoch": 0.05016393442622951, "grad_norm": 0.17536795139312744, "learning_rate": 5.683620778194649e-05, "loss": 7.0337, "step": 7540 }, { "epoch": 0.050416141235813366, "grad_norm": 0.22041486203670502, "learning_rate": 5.682501423790753e-05, "loss": 7.0626, "step": 7550 }, { "epoch": 0.05066834804539722, "grad_norm": 0.22649215161800385, "learning_rate": 5.681380203361171e-05, "loss": 7.0741, "step": 7560 }, { "epoch": 0.050920554854981086, "grad_norm": 0.21708476543426514, "learning_rate": 5.6802571176858593e-05, "loss": 7.0742, "step": 7570 }, { "epoch": 0.05117276166456494, "grad_norm": 0.2067744880914688, "learning_rate": 5.6791321675460654e-05, "loss": 7.0723, "step": 7580 }, { "epoch": 0.051424968474148805, "grad_norm": 0.22095611691474915, "learning_rate": 5.678005353724341e-05, "loss": 7.061, "step": 7590 }, { "epoch": 0.05167717528373266, "grad_norm": 0.21755394339561462, "learning_rate": 5.676876677004527e-05, "loss": 7.0761, "step": 7600 }, { "epoch": 0.05192938209331652, "grad_norm": 0.225448340177536, "learning_rate": 5.6757461381717654e-05, "loss": 7.0787, "step": 7610 }, { "epoch": 0.05218158890290038, "grad_norm": 0.21042786538600922, "learning_rate": 5.674613738012492e-05, "loss": 7.0626, "step": 7620 }, { "epoch": 0.05243379571248424, "grad_norm": 0.17969447374343872, "learning_rate": 5.673479477314437e-05, "loss": 7.0607, "step": 7630 }, { "epoch": 0.05268600252206809, "grad_norm": 0.17723916471004486, "learning_rate": 5.672343356866623e-05, "loss": 7.0497, "step": 7640 }, { "epoch": 0.052938209331651956, "grad_norm": 0.1994636058807373, "learning_rate": 5.6712053774593714e-05, "loss": 7.058, "step": 7650 }, { "epoch": 0.05319041614123581, "grad_norm": 0.1918351948261261, "learning_rate": 5.670065539884291e-05, "loss": 7.0588, "step": 7660 }, { "epoch": 0.053442622950819675, "grad_norm": 0.17553338408470154, "learning_rate": 5.6689238449342884e-05, "loss": 7.0445, "step": 7670 }, { "epoch": 0.05369482976040353, "grad_norm": 0.2063758224248886, "learning_rate": 5.667780293403557e-05, "loss": 7.0511, "step": 7680 }, { "epoch": 0.05394703656998739, "grad_norm": 0.2032390683889389, "learning_rate": 5.6666348860875866e-05, "loss": 7.0573, "step": 7690 }, { "epoch": 0.05419924337957125, "grad_norm": 0.20917579531669617, "learning_rate": 5.665487623783153e-05, "loss": 7.0608, "step": 7700 }, { "epoch": 0.05445145018915511, "grad_norm": 0.20957161486148834, "learning_rate": 5.664338507288328e-05, "loss": 7.0643, "step": 7710 }, { "epoch": 0.05470365699873896, "grad_norm": 0.14790500700473785, "learning_rate": 5.663187537402469e-05, "loss": 7.0604, "step": 7720 }, { "epoch": 0.054955863808322826, "grad_norm": 0.17017485201358795, "learning_rate": 5.662034714926224e-05, "loss": 7.0158, "step": 7730 }, { "epoch": 0.05520807061790668, "grad_norm": 0.15934252738952637, "learning_rate": 5.6608800406615306e-05, "loss": 7.0205, "step": 7740 }, { "epoch": 0.055460277427490545, "grad_norm": 0.15800200402736664, "learning_rate": 5.659723515411612e-05, "loss": 7.0204, "step": 7750 }, { "epoch": 0.0557124842370744, "grad_norm": 0.16340184211730957, "learning_rate": 5.6585651399809826e-05, "loss": 7.02, "step": 7760 }, { "epoch": 0.05596469104665826, "grad_norm": 0.21474768221378326, "learning_rate": 5.6574049151754394e-05, "loss": 7.0696, "step": 7770 }, { "epoch": 0.05621689785624212, "grad_norm": 0.21832896769046783, "learning_rate": 5.656242841802071e-05, "loss": 7.0727, "step": 7780 }, { "epoch": 0.05646910466582598, "grad_norm": 0.21053597331047058, "learning_rate": 5.6550789206692476e-05, "loss": 7.07, "step": 7790 }, { "epoch": 0.05672131147540983, "grad_norm": 0.20995771884918213, "learning_rate": 5.653913152586627e-05, "loss": 7.053, "step": 7800 }, { "epoch": 0.056973518284993696, "grad_norm": 0.17079128324985504, "learning_rate": 5.6527455383651506e-05, "loss": 7.0585, "step": 7810 }, { "epoch": 0.05722572509457755, "grad_norm": 0.19060169160366058, "learning_rate": 5.6515760788170455e-05, "loss": 7.0537, "step": 7820 }, { "epoch": 0.057477931904161415, "grad_norm": 0.18240195512771606, "learning_rate": 5.650404774755821e-05, "loss": 7.0543, "step": 7830 }, { "epoch": 0.05773013871374527, "grad_norm": 0.15019267797470093, "learning_rate": 5.649231626996272e-05, "loss": 7.044, "step": 7840 }, { "epoch": 0.05798234552332913, "grad_norm": 0.2040311098098755, "learning_rate": 5.648056636354472e-05, "loss": 7.0556, "step": 7850 }, { "epoch": 0.05823455233291299, "grad_norm": 0.18561887741088867, "learning_rate": 5.6468798036477784e-05, "loss": 7.0514, "step": 7860 }, { "epoch": 0.05848675914249685, "grad_norm": 0.20358730852603912, "learning_rate": 5.645701129694832e-05, "loss": 7.0411, "step": 7870 }, { "epoch": 0.0587389659520807, "grad_norm": 0.20001614093780518, "learning_rate": 5.6445206153155506e-05, "loss": 7.0571, "step": 7880 }, { "epoch": 0.058991172761664566, "grad_norm": 0.1947861760854721, "learning_rate": 5.643338261331135e-05, "loss": 7.0609, "step": 7890 }, { "epoch": 0.05924337957124842, "grad_norm": 0.19761738181114197, "learning_rate": 5.642154068564065e-05, "loss": 7.0594, "step": 7900 }, { "epoch": 0.059495586380832285, "grad_norm": 0.15108056366443634, "learning_rate": 5.640968037838099e-05, "loss": 7.0436, "step": 7910 }, { "epoch": 0.05974779319041614, "grad_norm": 0.19151745736598969, "learning_rate": 5.6397801699782754e-05, "loss": 7.0426, "step": 7920 }, { "epoch": 0.06, "grad_norm": 0.18520492315292358, "learning_rate": 5.638590465810907e-05, "loss": 7.032, "step": 7930 }, { "epoch": 0.06025220680958386, "grad_norm": 0.19372153282165527, "learning_rate": 5.6373989261635896e-05, "loss": 7.0465, "step": 7940 }, { "epoch": 0.06050441361916772, "grad_norm": 0.19277332723140717, "learning_rate": 5.6362055518651905e-05, "loss": 7.0489, "step": 7950 }, { "epoch": 0.06075662042875157, "grad_norm": 0.1666538119316101, "learning_rate": 5.6350103437458564e-05, "loss": 7.0418, "step": 7960 }, { "epoch": 0.061008827238335436, "grad_norm": 0.20245084166526794, "learning_rate": 5.6338133026370086e-05, "loss": 7.0515, "step": 7970 }, { "epoch": 0.06126103404791929, "grad_norm": 0.21279408037662506, "learning_rate": 5.632614429371343e-05, "loss": 7.0689, "step": 7980 }, { "epoch": 0.061513240857503156, "grad_norm": 0.20148363709449768, "learning_rate": 5.631413724782831e-05, "loss": 7.0664, "step": 7990 }, { "epoch": 0.06176544766708701, "grad_norm": 0.20002469420433044, "learning_rate": 5.6302111897067184e-05, "loss": 7.0605, "step": 8000 }, { "epoch": 0.06201765447667087, "grad_norm": 0.19029393792152405, "learning_rate": 5.629006824979521e-05, "loss": 7.0557, "step": 8010 }, { "epoch": 0.06226986128625473, "grad_norm": 0.20180237293243408, "learning_rate": 5.6278006314390314e-05, "loss": 7.0651, "step": 8020 }, { "epoch": 0.06252206809583859, "grad_norm": 0.21171844005584717, "learning_rate": 5.626592609924314e-05, "loss": 7.0684, "step": 8030 }, { "epoch": 0.06277427490542245, "grad_norm": 0.21870754659175873, "learning_rate": 5.6253827612757004e-05, "loss": 7.0718, "step": 8040 }, { "epoch": 0.0630264817150063, "grad_norm": 0.20330548286437988, "learning_rate": 5.6241710863347996e-05, "loss": 7.0553, "step": 8050 }, { "epoch": 0.06327868852459016, "grad_norm": 0.1975962221622467, "learning_rate": 5.622957585944486e-05, "loss": 7.0628, "step": 8060 }, { "epoch": 0.06353089533417403, "grad_norm": 0.19782015681266785, "learning_rate": 5.621742260948906e-05, "loss": 7.0589, "step": 8070 }, { "epoch": 0.06378310214375787, "grad_norm": 0.20466746389865875, "learning_rate": 5.620525112193474e-05, "loss": 7.0621, "step": 8080 }, { "epoch": 0.06403530895334174, "grad_norm": 0.18516777455806732, "learning_rate": 5.6193061405248754e-05, "loss": 7.0548, "step": 8090 }, { "epoch": 0.0642875157629256, "grad_norm": 0.21205070614814758, "learning_rate": 5.618085346791061e-05, "loss": 7.064, "step": 8100 }, { "epoch": 0.06453972257250946, "grad_norm": 0.20627938210964203, "learning_rate": 5.616862731841251e-05, "loss": 7.071, "step": 8110 }, { "epoch": 0.06479192938209331, "grad_norm": 0.1907789409160614, "learning_rate": 5.615638296525931e-05, "loss": 7.0634, "step": 8120 }, { "epoch": 0.06504413619167718, "grad_norm": 0.19622276723384857, "learning_rate": 5.6144120416968555e-05, "loss": 7.0586, "step": 8130 }, { "epoch": 0.06529634300126104, "grad_norm": 0.17398089170455933, "learning_rate": 5.613183968207042e-05, "loss": 7.0364, "step": 8140 }, { "epoch": 0.06554854981084489, "grad_norm": 0.13863994181156158, "learning_rate": 5.611954076910772e-05, "loss": 7.023, "step": 8150 }, { "epoch": 0.06580075662042875, "grad_norm": 0.16009508073329926, "learning_rate": 5.6107223686635956e-05, "loss": 7.0095, "step": 8160 }, { "epoch": 0.06605296343001262, "grad_norm": 0.14189541339874268, "learning_rate": 5.609488844322325e-05, "loss": 7.0151, "step": 8170 }, { "epoch": 0.06630517023959646, "grad_norm": 0.14596349000930786, "learning_rate": 5.608253504745034e-05, "loss": 7.0137, "step": 8180 }, { "epoch": 0.06655737704918033, "grad_norm": 0.18430890142917633, "learning_rate": 5.607016350791061e-05, "loss": 7.0236, "step": 8190 }, { "epoch": 0.06680958385876419, "grad_norm": 0.2027202695608139, "learning_rate": 5.605777383321007e-05, "loss": 7.0484, "step": 8200 }, { "epoch": 0.06706179066834804, "grad_norm": 0.1944209188222885, "learning_rate": 5.6045366031967326e-05, "loss": 7.0602, "step": 8210 }, { "epoch": 0.0673139974779319, "grad_norm": 0.18172238767147064, "learning_rate": 5.603294011281362e-05, "loss": 7.0539, "step": 8220 }, { "epoch": 0.06756620428751577, "grad_norm": 0.19313764572143555, "learning_rate": 5.6020496084392765e-05, "loss": 7.0539, "step": 8230 }, { "epoch": 0.06781841109709962, "grad_norm": 0.20719696581363678, "learning_rate": 5.6008033955361184e-05, "loss": 7.0585, "step": 8240 }, { "epoch": 0.06807061790668348, "grad_norm": 0.19512107968330383, "learning_rate": 5.5995553734387914e-05, "loss": 7.0701, "step": 8250 }, { "epoch": 0.06832282471626734, "grad_norm": 0.17552125453948975, "learning_rate": 5.5983055430154535e-05, "loss": 7.0529, "step": 8260 }, { "epoch": 0.0685750315258512, "grad_norm": 0.17933215200901031, "learning_rate": 5.597053905135525e-05, "loss": 7.0492, "step": 8270 }, { "epoch": 0.06882723833543505, "grad_norm": 0.184284508228302, "learning_rate": 5.595800460669681e-05, "loss": 7.0532, "step": 8280 }, { "epoch": 0.06907944514501892, "grad_norm": 0.20368865132331848, "learning_rate": 5.5945452104898536e-05, "loss": 7.0635, "step": 8290 }, { "epoch": 0.06933165195460278, "grad_norm": 0.19298294186592102, "learning_rate": 5.59328815546923e-05, "loss": 7.0615, "step": 8300 }, { "epoch": 0.06958385876418663, "grad_norm": 0.1994616836309433, "learning_rate": 5.592029296482256e-05, "loss": 7.0521, "step": 8310 }, { "epoch": 0.06983606557377049, "grad_norm": 0.20446720719337463, "learning_rate": 5.590768634404629e-05, "loss": 7.0649, "step": 8320 }, { "epoch": 0.07008827238335436, "grad_norm": 0.20642350614070892, "learning_rate": 5.5895061701133026e-05, "loss": 7.0608, "step": 8330 }, { "epoch": 0.0703404791929382, "grad_norm": 0.19710470736026764, "learning_rate": 5.588241904486484e-05, "loss": 7.061, "step": 8340 }, { "epoch": 0.07059268600252207, "grad_norm": 0.2008451223373413, "learning_rate": 5.586975838403631e-05, "loss": 7.0582, "step": 8350 }, { "epoch": 0.07084489281210593, "grad_norm": 0.16910766065120697, "learning_rate": 5.585707972745458e-05, "loss": 7.0553, "step": 8360 }, { "epoch": 0.07109709962168978, "grad_norm": 0.1689150184392929, "learning_rate": 5.584438308393929e-05, "loss": 7.0346, "step": 8370 }, { "epoch": 0.07134930643127364, "grad_norm": 0.1668652892112732, "learning_rate": 5.5831668462322585e-05, "loss": 7.0442, "step": 8380 }, { "epoch": 0.0716015132408575, "grad_norm": 0.19506695866584778, "learning_rate": 5.581893587144912e-05, "loss": 7.0552, "step": 8390 }, { "epoch": 0.07185372005044136, "grad_norm": 0.19236227869987488, "learning_rate": 5.580618532017606e-05, "loss": 7.0568, "step": 8400 }, { "epoch": 0.07210592686002522, "grad_norm": 0.1918887495994568, "learning_rate": 5.579341681737306e-05, "loss": 7.0546, "step": 8410 }, { "epoch": 0.07235813366960908, "grad_norm": 0.14888505637645721, "learning_rate": 5.578063037192226e-05, "loss": 7.0398, "step": 8420 }, { "epoch": 0.07261034047919294, "grad_norm": 0.15782593190670013, "learning_rate": 5.576782599271827e-05, "loss": 7.0498, "step": 8430 }, { "epoch": 0.0728625472887768, "grad_norm": 0.1646624654531479, "learning_rate": 5.5755003688668204e-05, "loss": 7.0281, "step": 8440 }, { "epoch": 0.07311475409836066, "grad_norm": 0.18371976912021637, "learning_rate": 5.574216346869163e-05, "loss": 7.0291, "step": 8450 }, { "epoch": 0.07336696090794452, "grad_norm": 0.2101437896490097, "learning_rate": 5.5729305341720544e-05, "loss": 7.0564, "step": 8460 }, { "epoch": 0.07361916771752837, "grad_norm": 0.18799065053462982, "learning_rate": 5.571642931669948e-05, "loss": 7.0612, "step": 8470 }, { "epoch": 0.07387137452711223, "grad_norm": 0.18636606633663177, "learning_rate": 5.5703535402585336e-05, "loss": 7.0548, "step": 8480 }, { "epoch": 0.0741235813366961, "grad_norm": 0.19167423248291016, "learning_rate": 5.569062360834751e-05, "loss": 7.0585, "step": 8490 }, { "epoch": 0.07437578814627994, "grad_norm": 0.1682589203119278, "learning_rate": 5.56776939429678e-05, "loss": 7.0488, "step": 8500 }, { "epoch": 0.07462799495586381, "grad_norm": 0.18884754180908203, "learning_rate": 5.5664746415440484e-05, "loss": 7.0426, "step": 8510 }, { "epoch": 0.07488020176544767, "grad_norm": 0.18220815062522888, "learning_rate": 5.565178103477222e-05, "loss": 7.0516, "step": 8520 }, { "epoch": 0.07513240857503152, "grad_norm": 0.17655254900455475, "learning_rate": 5.563879780998211e-05, "loss": 7.0535, "step": 8530 }, { "epoch": 0.07538461538461538, "grad_norm": 0.1764201819896698, "learning_rate": 5.5625796750101656e-05, "loss": 7.0502, "step": 8540 }, { "epoch": 0.07563682219419925, "grad_norm": 0.19626182317733765, "learning_rate": 5.5612777864174774e-05, "loss": 7.0476, "step": 8550 }, { "epoch": 0.0758890290037831, "grad_norm": 0.17213013768196106, "learning_rate": 5.559974116125778e-05, "loss": 7.0473, "step": 8560 }, { "epoch": 0.07614123581336696, "grad_norm": 0.18395720422267914, "learning_rate": 5.558668665041937e-05, "loss": 7.0492, "step": 8570 }, { "epoch": 0.07639344262295082, "grad_norm": 0.1431710124015808, "learning_rate": 5.557361434074066e-05, "loss": 7.0318, "step": 8580 }, { "epoch": 0.07664564943253468, "grad_norm": 0.18209077417850494, "learning_rate": 5.556052424131511e-05, "loss": 7.0463, "step": 8590 }, { "epoch": 0.07689785624211853, "grad_norm": 0.16840142011642456, "learning_rate": 5.5547416361248574e-05, "loss": 7.0389, "step": 8600 }, { "epoch": 0.0771500630517024, "grad_norm": 0.14904600381851196, "learning_rate": 5.5534290709659294e-05, "loss": 7.0175, "step": 8610 }, { "epoch": 0.07740226986128626, "grad_norm": 0.1535763293504715, "learning_rate": 5.5521147295677824e-05, "loss": 7.0174, "step": 8620 }, { "epoch": 0.07765447667087011, "grad_norm": 0.1534355878829956, "learning_rate": 5.5507986128447125e-05, "loss": 7.0163, "step": 8630 }, { "epoch": 0.07790668348045397, "grad_norm": 0.14829765260219574, "learning_rate": 5.549480721712247e-05, "loss": 7.0166, "step": 8640 }, { "epoch": 0.07815889029003784, "grad_norm": 0.17000535130500793, "learning_rate": 5.548161057087151e-05, "loss": 7.0159, "step": 8650 }, { "epoch": 0.07841109709962168, "grad_norm": 0.17461834847927094, "learning_rate": 5.5468396198874196e-05, "loss": 7.0436, "step": 8660 }, { "epoch": 0.07866330390920555, "grad_norm": 0.20149646699428558, "learning_rate": 5.5455164110322835e-05, "loss": 7.0542, "step": 8670 }, { "epoch": 0.07891551071878941, "grad_norm": 0.18285039067268372, "learning_rate": 5.544191431442205e-05, "loss": 7.0566, "step": 8680 }, { "epoch": 0.07916771752837326, "grad_norm": 0.19492173194885254, "learning_rate": 5.5428646820388786e-05, "loss": 7.0591, "step": 8690 }, { "epoch": 0.07941992433795712, "grad_norm": 0.1813765913248062, "learning_rate": 5.5415361637452295e-05, "loss": 7.0537, "step": 8700 }, { "epoch": 0.07967213114754099, "grad_norm": 0.14669491350650787, "learning_rate": 5.540205877485415e-05, "loss": 7.0344, "step": 8710 }, { "epoch": 0.07992433795712484, "grad_norm": 0.16920779645442963, "learning_rate": 5.538873824184817e-05, "loss": 7.0293, "step": 8720 }, { "epoch": 0.0801765447667087, "grad_norm": 0.16444742679595947, "learning_rate": 5.537540004770053e-05, "loss": 7.033, "step": 8730 }, { "epoch": 0.08042875157629256, "grad_norm": 0.18776105344295502, "learning_rate": 5.536204420168966e-05, "loss": 7.0458, "step": 8740 }, { "epoch": 0.08068095838587643, "grad_norm": 0.17036353051662445, "learning_rate": 5.534867071310628e-05, "loss": 7.0422, "step": 8750 }, { "epoch": 0.08093316519546027, "grad_norm": 0.1529814600944519, "learning_rate": 5.5335279591253366e-05, "loss": 7.0385, "step": 8760 }, { "epoch": 0.08118537200504414, "grad_norm": 0.17665208876132965, "learning_rate": 5.532187084544617e-05, "loss": 7.052, "step": 8770 }, { "epoch": 0.081437578814628, "grad_norm": 0.19775933027267456, "learning_rate": 5.530844448501221e-05, "loss": 7.0515, "step": 8780 }, { "epoch": 0.08168978562421185, "grad_norm": 0.18007896840572357, "learning_rate": 5.529500051929126e-05, "loss": 7.0478, "step": 8790 }, { "epoch": 0.08194199243379571, "grad_norm": 0.17311082780361176, "learning_rate": 5.5281538957635304e-05, "loss": 7.0465, "step": 8800 }, { "epoch": 0.08219419924337958, "grad_norm": 0.19909489154815674, "learning_rate": 5.526805980940861e-05, "loss": 7.0531, "step": 8810 }, { "epoch": 0.08244640605296343, "grad_norm": 0.19437648355960846, "learning_rate": 5.525456308398768e-05, "loss": 7.0586, "step": 8820 }, { "epoch": 0.08269861286254729, "grad_norm": 0.18446266651153564, "learning_rate": 5.5241048790761194e-05, "loss": 7.0585, "step": 8830 }, { "epoch": 0.08295081967213115, "grad_norm": 0.18008007109165192, "learning_rate": 5.522751693913012e-05, "loss": 7.0431, "step": 8840 }, { "epoch": 0.083203026481715, "grad_norm": 0.18396054208278656, "learning_rate": 5.521396753850757e-05, "loss": 7.0497, "step": 8850 }, { "epoch": 0.08345523329129886, "grad_norm": 0.19769428670406342, "learning_rate": 5.5200400598318935e-05, "loss": 7.0555, "step": 8860 }, { "epoch": 0.08370744010088273, "grad_norm": 0.1954563707113266, "learning_rate": 5.518681612800175e-05, "loss": 7.0616, "step": 8870 }, { "epoch": 0.08395964691046658, "grad_norm": 0.17377570271492004, "learning_rate": 5.517321413700578e-05, "loss": 7.0484, "step": 8880 }, { "epoch": 0.08421185372005044, "grad_norm": 0.1938530057668686, "learning_rate": 5.5159594634792935e-05, "loss": 7.0551, "step": 8890 }, { "epoch": 0.0844640605296343, "grad_norm": 0.17689922451972961, "learning_rate": 5.514595763083737e-05, "loss": 7.0532, "step": 8900 }, { "epoch": 0.08471626733921817, "grad_norm": 0.169033944606781, "learning_rate": 5.513230313462536e-05, "loss": 7.0425, "step": 8910 }, { "epoch": 0.08496847414880201, "grad_norm": 0.18733908236026764, "learning_rate": 5.511863115565537e-05, "loss": 7.0454, "step": 8920 }, { "epoch": 0.08522068095838588, "grad_norm": 0.1908901035785675, "learning_rate": 5.5104941703438034e-05, "loss": 7.0547, "step": 8930 }, { "epoch": 0.08547288776796974, "grad_norm": 0.1782771646976471, "learning_rate": 5.5091234787496125e-05, "loss": 7.0486, "step": 8940 }, { "epoch": 0.08572509457755359, "grad_norm": 0.13016663491725922, "learning_rate": 5.507751041736458e-05, "loss": 7.0291, "step": 8950 }, { "epoch": 0.08597730138713745, "grad_norm": 0.12580156326293945, "learning_rate": 5.506376860259045e-05, "loss": 7.0075, "step": 8960 }, { "epoch": 0.08622950819672132, "grad_norm": 0.09653211385011673, "learning_rate": 5.505000935273295e-05, "loss": 6.9889, "step": 8970 }, { "epoch": 0.08648171500630517, "grad_norm": 0.13552798330783844, "learning_rate": 5.503623267736341e-05, "loss": 7.0074, "step": 8980 }, { "epoch": 0.08673392181588903, "grad_norm": 0.12972988188266754, "learning_rate": 5.502243858606529e-05, "loss": 6.9994, "step": 8990 }, { "epoch": 0.08698612862547289, "grad_norm": 0.16716577112674713, "learning_rate": 5.500862708843415e-05, "loss": 7.0256, "step": 9000 }, { "epoch": 0.08723833543505674, "grad_norm": 0.18860507011413574, "learning_rate": 5.499479819407768e-05, "loss": 7.0512, "step": 9010 }, { "epoch": 0.0874905422446406, "grad_norm": 0.17564667761325836, "learning_rate": 5.4980951912615646e-05, "loss": 7.0453, "step": 9020 }, { "epoch": 0.08774274905422447, "grad_norm": 0.1747795194387436, "learning_rate": 5.4967088253679935e-05, "loss": 7.0443, "step": 9030 }, { "epoch": 0.08799495586380832, "grad_norm": 0.17015725374221802, "learning_rate": 5.49532072269145e-05, "loss": 7.0497, "step": 9040 }, { "epoch": 0.08824716267339218, "grad_norm": 0.19313786923885345, "learning_rate": 5.493930884197539e-05, "loss": 7.0464, "step": 9050 }, { "epoch": 0.08849936948297604, "grad_norm": 0.17507658898830414, "learning_rate": 5.492539310853072e-05, "loss": 7.0481, "step": 9060 }, { "epoch": 0.0887515762925599, "grad_norm": 0.19275647401809692, "learning_rate": 5.491146003626069e-05, "loss": 7.0556, "step": 9070 }, { "epoch": 0.08900378310214375, "grad_norm": 0.19791698455810547, "learning_rate": 5.4897509634857535e-05, "loss": 7.0575, "step": 9080 }, { "epoch": 0.08925598991172762, "grad_norm": 0.19396840035915375, "learning_rate": 5.488354191402556e-05, "loss": 7.0594, "step": 9090 }, { "epoch": 0.08950819672131148, "grad_norm": 0.19874662160873413, "learning_rate": 5.4869556883481114e-05, "loss": 7.0619, "step": 9100 }, { "epoch": 0.08976040353089533, "grad_norm": 0.11861883848905563, "learning_rate": 5.48555545529526e-05, "loss": 7.0436, "step": 9110 }, { "epoch": 0.0900126103404792, "grad_norm": 0.12202567607164383, "learning_rate": 5.484153493218043e-05, "loss": 6.9947, "step": 9120 }, { "epoch": 0.09026481715006306, "grad_norm": 0.12041110545396805, "learning_rate": 5.4827498030917086e-05, "loss": 7.0036, "step": 9130 }, { "epoch": 0.0905170239596469, "grad_norm": 0.13264809548854828, "learning_rate": 5.481344385892701e-05, "loss": 7.0025, "step": 9140 }, { "epoch": 0.09076923076923077, "grad_norm": 0.14944374561309814, "learning_rate": 5.47993724259867e-05, "loss": 7.0073, "step": 9150 }, { "epoch": 0.09102143757881463, "grad_norm": 0.183827742934227, "learning_rate": 5.478528374188466e-05, "loss": 7.0546, "step": 9160 }, { "epoch": 0.09127364438839848, "grad_norm": 0.19227850437164307, "learning_rate": 5.477117781642139e-05, "loss": 7.0549, "step": 9170 }, { "epoch": 0.09152585119798234, "grad_norm": 0.182961568236351, "learning_rate": 5.475705465940937e-05, "loss": 7.0539, "step": 9180 }, { "epoch": 0.09177805800756621, "grad_norm": 0.19388459622859955, "learning_rate": 5.474291428067309e-05, "loss": 7.0492, "step": 9190 }, { "epoch": 0.09203026481715006, "grad_norm": 0.184714213013649, "learning_rate": 5.472875669004899e-05, "loss": 7.0505, "step": 9200 }, { "epoch": 0.09228247162673392, "grad_norm": 0.18527236580848694, "learning_rate": 5.4714581897385506e-05, "loss": 7.0529, "step": 9210 }, { "epoch": 0.09253467843631778, "grad_norm": 0.14500992000102997, "learning_rate": 5.470038991254304e-05, "loss": 7.0407, "step": 9220 }, { "epoch": 0.09278688524590165, "grad_norm": 0.15856777131557465, "learning_rate": 5.4686180745393944e-05, "loss": 7.0245, "step": 9230 }, { "epoch": 0.0930390920554855, "grad_norm": 0.1520087867975235, "learning_rate": 5.4671954405822526e-05, "loss": 7.0256, "step": 9240 }, { "epoch": 0.09329129886506936, "grad_norm": 0.18383564054965973, "learning_rate": 5.465771090372504e-05, "loss": 7.042, "step": 9250 }, { "epoch": 0.09354350567465322, "grad_norm": 0.168827086687088, "learning_rate": 5.4643450249009685e-05, "loss": 7.0384, "step": 9260 }, { "epoch": 0.09379571248423707, "grad_norm": 0.17891252040863037, "learning_rate": 5.462917245159658e-05, "loss": 7.0446, "step": 9270 }, { "epoch": 0.09404791929382093, "grad_norm": 0.18723298609256744, "learning_rate": 5.461487752141776e-05, "loss": 7.0502, "step": 9280 }, { "epoch": 0.0943001261034048, "grad_norm": 0.18257226049900055, "learning_rate": 5.460056546841722e-05, "loss": 7.0516, "step": 9290 }, { "epoch": 0.09455233291298865, "grad_norm": 0.17528903484344482, "learning_rate": 5.458623630255082e-05, "loss": 7.0551, "step": 9300 }, { "epoch": 0.09480453972257251, "grad_norm": 0.1738893985748291, "learning_rate": 5.457189003378635e-05, "loss": 7.0447, "step": 9310 }, { "epoch": 0.09505674653215637, "grad_norm": 0.1582866907119751, "learning_rate": 5.4557526672103485e-05, "loss": 7.0406, "step": 9320 }, { "epoch": 0.09530895334174022, "grad_norm": 0.18868392705917358, "learning_rate": 5.45431462274938e-05, "loss": 7.0369, "step": 9330 }, { "epoch": 0.09556116015132408, "grad_norm": 0.1830597221851349, "learning_rate": 5.4528748709960746e-05, "loss": 7.0516, "step": 9340 }, { "epoch": 0.09581336696090795, "grad_norm": 0.16310523450374603, "learning_rate": 5.451433412951966e-05, "loss": 7.0429, "step": 9350 }, { "epoch": 0.0960655737704918, "grad_norm": 0.18104340136051178, "learning_rate": 5.449990249619773e-05, "loss": 7.0396, "step": 9360 }, { "epoch": 0.09631778058007566, "grad_norm": 0.1832277923822403, "learning_rate": 5.448545382003402e-05, "loss": 7.0539, "step": 9370 }, { "epoch": 0.09656998738965952, "grad_norm": 0.18179331719875336, "learning_rate": 5.447098811107946e-05, "loss": 7.0556, "step": 9380 }, { "epoch": 0.09682219419924339, "grad_norm": 0.16805732250213623, "learning_rate": 5.44565053793968e-05, "loss": 7.0495, "step": 9390 }, { "epoch": 0.09707440100882724, "grad_norm": 0.1774771362543106, "learning_rate": 5.444200563506066e-05, "loss": 7.0448, "step": 9400 }, { "epoch": 0.0973266078184111, "grad_norm": 0.17557114362716675, "learning_rate": 5.442748888815748e-05, "loss": 7.042, "step": 9410 }, { "epoch": 0.09757881462799496, "grad_norm": 0.1357637196779251, "learning_rate": 5.4412955148785524e-05, "loss": 7.0265, "step": 9420 }, { "epoch": 0.09783102143757881, "grad_norm": 0.16532544791698456, "learning_rate": 5.4398404427054884e-05, "loss": 7.0382, "step": 9430 }, { "epoch": 0.09808322824716267, "grad_norm": 0.1613602638244629, "learning_rate": 5.4383836733087464e-05, "loss": 7.0316, "step": 9440 }, { "epoch": 0.09833543505674654, "grad_norm": 0.18105310201644897, "learning_rate": 5.436925207701698e-05, "loss": 7.0308, "step": 9450 }, { "epoch": 0.09858764186633039, "grad_norm": 0.18518279492855072, "learning_rate": 5.435465046898893e-05, "loss": 7.0531, "step": 9460 }, { "epoch": 0.09883984867591425, "grad_norm": 0.17499639093875885, "learning_rate": 5.434003191916062e-05, "loss": 7.0548, "step": 9470 }, { "epoch": 0.09909205548549811, "grad_norm": 0.1828228235244751, "learning_rate": 5.432539643770114e-05, "loss": 7.0511, "step": 9480 }, { "epoch": 0.09934426229508196, "grad_norm": 0.16828420758247375, "learning_rate": 5.431074403479135e-05, "loss": 7.0507, "step": 9490 }, { "epoch": 0.09959646910466582, "grad_norm": 0.16204184293746948, "learning_rate": 5.42960747206239e-05, "loss": 7.0325, "step": 9500 }, { "epoch": 0.00025220680958385876, "grad_norm": 0.1891125738620758, "learning_rate": 5.4281388505403166e-05, "loss": 7.0534, "step": 9510 }, { "epoch": 0.0005044136191677175, "grad_norm": 0.18200230598449707, "learning_rate": 5.4266685399345324e-05, "loss": 7.0533, "step": 9520 }, { "epoch": 0.0007566204287515763, "grad_norm": 0.1826929897069931, "learning_rate": 5.4251965412678266e-05, "loss": 7.0494, "step": 9530 }, { "epoch": 0.001008827238335435, "grad_norm": 0.18874965608119965, "learning_rate": 5.423722855564165e-05, "loss": 7.0496, "step": 9540 }, { "epoch": 0.0012610340479192938, "grad_norm": 0.16083575785160065, "learning_rate": 5.4222474838486866e-05, "loss": 7.0397, "step": 9550 }, { "epoch": 0.0015132408575031526, "grad_norm": 0.16287243366241455, "learning_rate": 5.420770427147702e-05, "loss": 7.0346, "step": 9560 }, { "epoch": 0.0017654476670870113, "grad_norm": 0.15673024952411652, "learning_rate": 5.419291686488694e-05, "loss": 7.0271, "step": 9570 }, { "epoch": 0.00201765447667087, "grad_norm": 0.16979970037937164, "learning_rate": 5.417811262900318e-05, "loss": 7.0473, "step": 9580 }, { "epoch": 0.002269861286254729, "grad_norm": 0.16276238858699799, "learning_rate": 5.4163291574124006e-05, "loss": 7.049, "step": 9590 }, { "epoch": 0.0025220680958385876, "grad_norm": 0.16708910465240479, "learning_rate": 5.414845371055935e-05, "loss": 7.0413, "step": 9600 }, { "epoch": 0.0027742749054224464, "grad_norm": 0.1773625761270523, "learning_rate": 5.4133599048630876e-05, "loss": 7.0445, "step": 9610 }, { "epoch": 0.003026481715006305, "grad_norm": 0.1531314253807068, "learning_rate": 5.411872759867192e-05, "loss": 7.0362, "step": 9620 }, { "epoch": 0.003278688524590164, "grad_norm": 0.17062915861606598, "learning_rate": 5.410383937102748e-05, "loss": 7.0375, "step": 9630 }, { "epoch": 0.0035308953341740227, "grad_norm": 0.18189162015914917, "learning_rate": 5.408893437605424e-05, "loss": 7.0455, "step": 9640 }, { "epoch": 0.0037831021437578815, "grad_norm": 0.1829890012741089, "learning_rate": 5.4074012624120564e-05, "loss": 7.0558, "step": 9650 }, { "epoch": 0.00403530895334174, "grad_norm": 0.18110725283622742, "learning_rate": 5.4059074125606436e-05, "loss": 7.0558, "step": 9660 }, { "epoch": 0.004287515762925599, "grad_norm": 0.1846550852060318, "learning_rate": 5.404411889090351e-05, "loss": 7.0572, "step": 9670 }, { "epoch": 0.004539722572509458, "grad_norm": 0.17940972745418549, "learning_rate": 5.402914693041509e-05, "loss": 7.0549, "step": 9680 }, { "epoch": 0.0047919293820933165, "grad_norm": 0.18011200428009033, "learning_rate": 5.40141582545561e-05, "loss": 7.052, "step": 9690 }, { "epoch": 0.005044136191677175, "grad_norm": 0.18187972903251648, "learning_rate": 5.39991528737531e-05, "loss": 7.0499, "step": 9700 }, { "epoch": 0.005296343001261034, "grad_norm": 0.19221895933151245, "learning_rate": 5.398413079844426e-05, "loss": 7.0577, "step": 9710 }, { "epoch": 0.005548549810844893, "grad_norm": 0.1502656787633896, "learning_rate": 5.396909203907938e-05, "loss": 7.046, "step": 9720 }, { "epoch": 0.005800756620428752, "grad_norm": 0.15707343816757202, "learning_rate": 5.395403660611984e-05, "loss": 7.0272, "step": 9730 }, { "epoch": 0.00605296343001261, "grad_norm": 0.13823489844799042, "learning_rate": 5.393896451003867e-05, "loss": 7.025, "step": 9740 }, { "epoch": 0.006305170239596469, "grad_norm": 0.15775422751903534, "learning_rate": 5.3923875761320415e-05, "loss": 7.0282, "step": 9750 }, { "epoch": 0.006557377049180328, "grad_norm": 0.17360910773277283, "learning_rate": 5.390877037046127e-05, "loss": 7.0381, "step": 9760 }, { "epoch": 0.006809583858764187, "grad_norm": 0.1680065244436264, "learning_rate": 5.389364834796898e-05, "loss": 7.0434, "step": 9770 }, { "epoch": 0.007061790668348045, "grad_norm": 0.1697879433631897, "learning_rate": 5.387850970436285e-05, "loss": 7.0452, "step": 9780 }, { "epoch": 0.007313997477931904, "grad_norm": 0.17972233891487122, "learning_rate": 5.386335445017378e-05, "loss": 7.0457, "step": 9790 }, { "epoch": 0.007566204287515763, "grad_norm": 0.1713174730539322, "learning_rate": 5.384818259594418e-05, "loss": 7.0332, "step": 9800 }, { "epoch": 0.007818411097099623, "grad_norm": 0.1606176346540451, "learning_rate": 5.383299415222805e-05, "loss": 7.0345, "step": 9810 }, { "epoch": 0.00807061790668348, "grad_norm": 0.1641971617937088, "learning_rate": 5.381778912959089e-05, "loss": 7.0401, "step": 9820 }, { "epoch": 0.00832282471626734, "grad_norm": 0.17109297215938568, "learning_rate": 5.380256753860979e-05, "loss": 7.039, "step": 9830 }, { "epoch": 0.008575031525851198, "grad_norm": 0.168721541762352, "learning_rate": 5.37873293898733e-05, "loss": 7.0463, "step": 9840 }, { "epoch": 0.008827238335435058, "grad_norm": 0.1647573858499527, "learning_rate": 5.3772074693981516e-05, "loss": 7.0394, "step": 9850 }, { "epoch": 0.009079445145018916, "grad_norm": 0.1569942831993103, "learning_rate": 5.3756803461546066e-05, "loss": 7.0367, "step": 9860 }, { "epoch": 0.009331651954602775, "grad_norm": 0.14562058448791504, "learning_rate": 5.374151570319004e-05, "loss": 7.032, "step": 9870 }, { "epoch": 0.009583858764186633, "grad_norm": 0.15443553030490875, "learning_rate": 5.372621142954807e-05, "loss": 7.0353, "step": 9880 }, { "epoch": 0.009836065573770493, "grad_norm": 0.15953265130519867, "learning_rate": 5.3710890651266225e-05, "loss": 7.0378, "step": 9890 }, { "epoch": 0.01008827238335435, "grad_norm": 0.1493910700082779, "learning_rate": 5.369555337900211e-05, "loss": 7.0355, "step": 9900 }, { "epoch": 0.01034047919293821, "grad_norm": 0.16823993623256683, "learning_rate": 5.3680199623424745e-05, "loss": 7.0356, "step": 9910 }, { "epoch": 0.010592686002522068, "grad_norm": 0.17981018126010895, "learning_rate": 5.366482939521467e-05, "loss": 7.0503, "step": 9920 }, { "epoch": 0.010844892812105928, "grad_norm": 0.17319153249263763, "learning_rate": 5.3649442705063866e-05, "loss": 7.0517, "step": 9930 }, { "epoch": 0.011097099621689786, "grad_norm": 0.17082861065864563, "learning_rate": 5.363403956367574e-05, "loss": 7.0488, "step": 9940 }, { "epoch": 0.011349306431273645, "grad_norm": 0.157545268535614, "learning_rate": 5.361861998176518e-05, "loss": 7.0319, "step": 9950 }, { "epoch": 0.011601513240857503, "grad_norm": 0.1552986055612564, "learning_rate": 5.3603183970058475e-05, "loss": 7.0326, "step": 9960 }, { "epoch": 0.011853720050441363, "grad_norm": 0.17059825360774994, "learning_rate": 5.3587731539293375e-05, "loss": 7.0403, "step": 9970 }, { "epoch": 0.01210592686002522, "grad_norm": 0.13770897686481476, "learning_rate": 5.3572262700219035e-05, "loss": 7.0321, "step": 9980 }, { "epoch": 0.01235813366960908, "grad_norm": 0.1417868584394455, "learning_rate": 5.3556777463596025e-05, "loss": 7.0204, "step": 9990 }, { "epoch": 0.012610340479192938, "grad_norm": 0.16359160840511322, "learning_rate": 5.3541275840196334e-05, "loss": 7.0249, "step": 10000 }, { "epoch": 0.012862547288776798, "grad_norm": 0.1601666361093521, "learning_rate": 5.352575784080334e-05, "loss": 7.0402, "step": 10010 }, { "epoch": 0.013114754098360656, "grad_norm": 0.16985160112380981, "learning_rate": 5.35102234762118e-05, "loss": 7.0337, "step": 10020 }, { "epoch": 0.013366960907944515, "grad_norm": 0.17120489478111267, "learning_rate": 5.3494672757227876e-05, "loss": 7.0368, "step": 10030 }, { "epoch": 0.013619167717528373, "grad_norm": 0.18267200887203217, "learning_rate": 5.3479105694669096e-05, "loss": 7.0464, "step": 10040 }, { "epoch": 0.013871374527112233, "grad_norm": 0.1634196639060974, "learning_rate": 5.346352229936437e-05, "loss": 7.0472, "step": 10050 }, { "epoch": 0.01412358133669609, "grad_norm": 0.1714300513267517, "learning_rate": 5.3447922582153934e-05, "loss": 7.0406, "step": 10060 }, { "epoch": 0.01437578814627995, "grad_norm": 0.1784055233001709, "learning_rate": 5.343230655388942e-05, "loss": 7.0473, "step": 10070 }, { "epoch": 0.014627994955863808, "grad_norm": 0.14910347759723663, "learning_rate": 5.34166742254338e-05, "loss": 7.0401, "step": 10080 }, { "epoch": 0.014880201765447668, "grad_norm": 0.14360199868679047, "learning_rate": 5.340102560766137e-05, "loss": 7.0296, "step": 10090 }, { "epoch": 0.015132408575031526, "grad_norm": 0.14652089774608612, "learning_rate": 5.338536071145773e-05, "loss": 7.023, "step": 10100 }, { "epoch": 0.015384615384615385, "grad_norm": 0.14787894487380981, "learning_rate": 5.336967954771988e-05, "loss": 7.0258, "step": 10110 }, { "epoch": 0.015636822194199245, "grad_norm": 0.1559966802597046, "learning_rate": 5.335398212735606e-05, "loss": 7.0353, "step": 10120 }, { "epoch": 0.0158890290037831, "grad_norm": 0.16122739017009735, "learning_rate": 5.333826846128587e-05, "loss": 7.0395, "step": 10130 }, { "epoch": 0.01614123581336696, "grad_norm": 0.16447995603084564, "learning_rate": 5.3322538560440183e-05, "loss": 7.0423, "step": 10140 }, { "epoch": 0.01639344262295082, "grad_norm": 0.15936128795146942, "learning_rate": 5.3306792435761176e-05, "loss": 7.0416, "step": 10150 }, { "epoch": 0.01664564943253468, "grad_norm": 0.18220850825309753, "learning_rate": 5.329103009820231e-05, "loss": 7.0481, "step": 10160 }, { "epoch": 0.016897856242118536, "grad_norm": 0.1584678292274475, "learning_rate": 5.32752515587283e-05, "loss": 7.0473, "step": 10170 }, { "epoch": 0.017150063051702396, "grad_norm": 0.17804084718227386, "learning_rate": 5.32594568283152e-05, "loss": 7.0474, "step": 10180 }, { "epoch": 0.017402269861286256, "grad_norm": 0.17320683598518372, "learning_rate": 5.3243645917950255e-05, "loss": 7.0503, "step": 10190 }, { "epoch": 0.017654476670870115, "grad_norm": 0.17593154311180115, "learning_rate": 5.322781883863199e-05, "loss": 7.0462, "step": 10200 }, { "epoch": 0.01790668348045397, "grad_norm": 0.1666613668203354, "learning_rate": 5.321197560137018e-05, "loss": 7.0431, "step": 10210 }, { "epoch": 0.01815889029003783, "grad_norm": 0.1775873601436615, "learning_rate": 5.3196116217185855e-05, "loss": 7.0484, "step": 10220 }, { "epoch": 0.01841109709962169, "grad_norm": 0.16698263585567474, "learning_rate": 5.318024069711124e-05, "loss": 7.0461, "step": 10230 }, { "epoch": 0.01866330390920555, "grad_norm": 0.17282380163669586, "learning_rate": 5.316434905218983e-05, "loss": 7.0487, "step": 10240 }, { "epoch": 0.018915510718789406, "grad_norm": 0.14828334748744965, "learning_rate": 5.314844129347629e-05, "loss": 7.0322, "step": 10250 }, { "epoch": 0.019167717528373266, "grad_norm": 0.1492428034543991, "learning_rate": 5.3132517432036534e-05, "loss": 7.031, "step": 10260 }, { "epoch": 0.019419924337957126, "grad_norm": 0.1814681887626648, "learning_rate": 5.311657747894766e-05, "loss": 7.051, "step": 10270 }, { "epoch": 0.019672131147540985, "grad_norm": 0.1618969589471817, "learning_rate": 5.3100621445297955e-05, "loss": 7.0411, "step": 10280 }, { "epoch": 0.01992433795712484, "grad_norm": 0.14833742380142212, "learning_rate": 5.30846493421869e-05, "loss": 7.0276, "step": 10290 }, { "epoch": 0.0201765447667087, "grad_norm": 0.15357738733291626, "learning_rate": 5.306866118072516e-05, "loss": 7.0254, "step": 10300 }, { "epoch": 0.02042875157629256, "grad_norm": 0.1773490607738495, "learning_rate": 5.3052656972034553e-05, "loss": 7.0384, "step": 10310 }, { "epoch": 0.02068095838587642, "grad_norm": 0.14568960666656494, "learning_rate": 5.303663672724807e-05, "loss": 7.0322, "step": 10320 }, { "epoch": 0.020933165195460277, "grad_norm": 0.143406942486763, "learning_rate": 5.302060045750986e-05, "loss": 7.0285, "step": 10330 }, { "epoch": 0.021185372005044136, "grad_norm": 0.1499478667974472, "learning_rate": 5.300454817397522e-05, "loss": 7.0262, "step": 10340 }, { "epoch": 0.021437578814627996, "grad_norm": 0.15159060060977936, "learning_rate": 5.298847988781058e-05, "loss": 7.0258, "step": 10350 }, { "epoch": 0.021689785624211855, "grad_norm": 0.13961094617843628, "learning_rate": 5.297239561019352e-05, "loss": 7.0248, "step": 10360 }, { "epoch": 0.02194199243379571, "grad_norm": 0.16692326962947845, "learning_rate": 5.29562953523127e-05, "loss": 7.036, "step": 10370 }, { "epoch": 0.02219419924337957, "grad_norm": 0.15379631519317627, "learning_rate": 5.2940179125367945e-05, "loss": 7.042, "step": 10380 }, { "epoch": 0.02244640605296343, "grad_norm": 0.17324195802211761, "learning_rate": 5.2924046940570175e-05, "loss": 7.0415, "step": 10390 }, { "epoch": 0.02269861286254729, "grad_norm": 0.1513446420431137, "learning_rate": 5.2907898809141386e-05, "loss": 7.04, "step": 10400 }, { "epoch": 0.022950819672131147, "grad_norm": 0.14063072204589844, "learning_rate": 5.289173474231469e-05, "loss": 7.0259, "step": 10410 }, { "epoch": 0.023203026481715006, "grad_norm": 0.16069568693637848, "learning_rate": 5.287555475133429e-05, "loss": 7.0347, "step": 10420 }, { "epoch": 0.023455233291298866, "grad_norm": 0.14112447202205658, "learning_rate": 5.285935884745546e-05, "loss": 7.036, "step": 10430 }, { "epoch": 0.023707440100882726, "grad_norm": 0.13967114686965942, "learning_rate": 5.284314704194453e-05, "loss": 7.0173, "step": 10440 }, { "epoch": 0.02395964691046658, "grad_norm": 0.14839349687099457, "learning_rate": 5.282691934607889e-05, "loss": 7.0223, "step": 10450 }, { "epoch": 0.02421185372005044, "grad_norm": 0.17180663347244263, "learning_rate": 5.281067577114702e-05, "loss": 7.0396, "step": 10460 }, { "epoch": 0.0244640605296343, "grad_norm": 0.16705630719661713, "learning_rate": 5.2794416328448404e-05, "loss": 7.0402, "step": 10470 }, { "epoch": 0.02471626733921816, "grad_norm": 0.16730934381484985, "learning_rate": 5.2778141029293585e-05, "loss": 7.0379, "step": 10480 }, { "epoch": 0.024968474148802017, "grad_norm": 0.14851653575897217, "learning_rate": 5.276184988500415e-05, "loss": 7.0325, "step": 10490 }, { "epoch": 0.025220680958385876, "grad_norm": 0.1435232311487198, "learning_rate": 5.274554290691266e-05, "loss": 7.0335, "step": 10500 }, { "epoch": 0.025472887767969736, "grad_norm": 0.16143979132175446, "learning_rate": 5.2729220106362745e-05, "loss": 7.0319, "step": 10510 }, { "epoch": 0.025725094577553596, "grad_norm": 0.1679828017950058, "learning_rate": 5.2712881494709006e-05, "loss": 7.0442, "step": 10520 }, { "epoch": 0.025977301387137452, "grad_norm": 0.16132695972919464, "learning_rate": 5.2696527083317054e-05, "loss": 7.0435, "step": 10530 }, { "epoch": 0.02622950819672131, "grad_norm": 0.16305659711360931, "learning_rate": 5.26801568835635e-05, "loss": 7.0399, "step": 10540 }, { "epoch": 0.02648171500630517, "grad_norm": 0.16418838500976562, "learning_rate": 5.266377090683592e-05, "loss": 7.0378, "step": 10550 }, { "epoch": 0.02673392181588903, "grad_norm": 0.16247306764125824, "learning_rate": 5.264736916453288e-05, "loss": 7.0395, "step": 10560 }, { "epoch": 0.026986128625472887, "grad_norm": 0.1531001180410385, "learning_rate": 5.263095166806391e-05, "loss": 7.0393, "step": 10570 }, { "epoch": 0.027238335435056747, "grad_norm": 0.16490842401981354, "learning_rate": 5.261451842884948e-05, "loss": 7.0367, "step": 10580 }, { "epoch": 0.027490542244640606, "grad_norm": 0.16874943673610687, "learning_rate": 5.2598069458321046e-05, "loss": 7.0425, "step": 10590 }, { "epoch": 0.027742749054224466, "grad_norm": 0.1460392326116562, "learning_rate": 5.258160476792098e-05, "loss": 7.0433, "step": 10600 }, { "epoch": 0.027994955863808322, "grad_norm": 0.15467850863933563, "learning_rate": 5.25651243691026e-05, "loss": 7.0343, "step": 10610 }, { "epoch": 0.02824716267339218, "grad_norm": 0.1480843424797058, "learning_rate": 5.254862827333014e-05, "loss": 7.0367, "step": 10620 }, { "epoch": 0.02849936948297604, "grad_norm": 0.13851363956928253, "learning_rate": 5.253211649207877e-05, "loss": 7.0269, "step": 10630 }, { "epoch": 0.0287515762925599, "grad_norm": 0.16065824031829834, "learning_rate": 5.251558903683457e-05, "loss": 7.028, "step": 10640 }, { "epoch": 0.029003783102143757, "grad_norm": 0.15594901144504547, "learning_rate": 5.249904591909451e-05, "loss": 7.0376, "step": 10650 }, { "epoch": 0.029255989911727617, "grad_norm": 0.16041845083236694, "learning_rate": 5.248248715036645e-05, "loss": 7.0366, "step": 10660 }, { "epoch": 0.029508196721311476, "grad_norm": 0.1560821682214737, "learning_rate": 5.2465912742169165e-05, "loss": 7.035, "step": 10670 }, { "epoch": 0.029760403530895336, "grad_norm": 0.14359967410564423, "learning_rate": 5.2449322706032285e-05, "loss": 7.0296, "step": 10680 }, { "epoch": 0.030012610340479192, "grad_norm": 0.15372496843338013, "learning_rate": 5.243271705349633e-05, "loss": 7.0281, "step": 10690 }, { "epoch": 0.03026481715006305, "grad_norm": 0.15956617891788483, "learning_rate": 5.241609579611266e-05, "loss": 7.0342, "step": 10700 }, { "epoch": 0.03051702395964691, "grad_norm": 0.1651666760444641, "learning_rate": 5.239945894544351e-05, "loss": 7.0334, "step": 10710 }, { "epoch": 0.03076923076923077, "grad_norm": 0.15578164160251617, "learning_rate": 5.238280651306195e-05, "loss": 7.035, "step": 10720 }, { "epoch": 0.031021437578814627, "grad_norm": 0.16434051096439362, "learning_rate": 5.23661385105519e-05, "loss": 7.0375, "step": 10730 }, { "epoch": 0.03127364438839849, "grad_norm": 0.16233299672603607, "learning_rate": 5.234945494950811e-05, "loss": 7.0432, "step": 10740 }, { "epoch": 0.031525851197982346, "grad_norm": 0.1509607434272766, "learning_rate": 5.2332755841536135e-05, "loss": 7.0369, "step": 10750 }, { "epoch": 0.0317780580075662, "grad_norm": 0.1525591015815735, "learning_rate": 5.231604119825236e-05, "loss": 7.0307, "step": 10760 }, { "epoch": 0.032030264817150066, "grad_norm": 0.1575518697500229, "learning_rate": 5.229931103128399e-05, "loss": 7.0365, "step": 10770 }, { "epoch": 0.03228247162673392, "grad_norm": 0.15320472419261932, "learning_rate": 5.2282565352269e-05, "loss": 7.0356, "step": 10780 }, { "epoch": 0.03253467843631778, "grad_norm": 0.16120173037052155, "learning_rate": 5.2265804172856176e-05, "loss": 7.0345, "step": 10790 }, { "epoch": 0.03278688524590164, "grad_norm": 0.16833053529262543, "learning_rate": 5.224902750470507e-05, "loss": 7.0371, "step": 10800 }, { "epoch": 0.0330390920554855, "grad_norm": 0.14730358123779297, "learning_rate": 5.2232235359486035e-05, "loss": 7.0338, "step": 10810 }, { "epoch": 0.03329129886506936, "grad_norm": 0.1527813822031021, "learning_rate": 5.2215427748880154e-05, "loss": 7.0343, "step": 10820 }, { "epoch": 0.033543505674653216, "grad_norm": 0.1579018384218216, "learning_rate": 5.2198604684579295e-05, "loss": 7.0374, "step": 10830 }, { "epoch": 0.03379571248423707, "grad_norm": 0.15046252310276031, "learning_rate": 5.218176617828608e-05, "loss": 7.0379, "step": 10840 }, { "epoch": 0.034047919293820936, "grad_norm": 0.16333068907260895, "learning_rate": 5.216491224171385e-05, "loss": 7.0314, "step": 10850 }, { "epoch": 0.03430012610340479, "grad_norm": 0.1617131233215332, "learning_rate": 5.214804288658669e-05, "loss": 7.0388, "step": 10860 }, { "epoch": 0.03455233291298865, "grad_norm": 0.15643340349197388, "learning_rate": 5.2131158124639426e-05, "loss": 7.0391, "step": 10870 }, { "epoch": 0.03480453972257251, "grad_norm": 0.14230549335479736, "learning_rate": 5.2114257967617566e-05, "loss": 7.0381, "step": 10880 }, { "epoch": 0.03505674653215637, "grad_norm": 0.1412665694952011, "learning_rate": 5.209734242727737e-05, "loss": 7.0297, "step": 10890 }, { "epoch": 0.03530895334174023, "grad_norm": 0.14541174471378326, "learning_rate": 5.208041151538576e-05, "loss": 7.0247, "step": 10900 }, { "epoch": 0.03556116015132409, "grad_norm": 0.1680935025215149, "learning_rate": 5.206346524372038e-05, "loss": 7.0401, "step": 10910 }, { "epoch": 0.03581336696090794, "grad_norm": 0.16246913373470306, "learning_rate": 5.2046503624069554e-05, "loss": 7.0414, "step": 10920 }, { "epoch": 0.036065573770491806, "grad_norm": 0.14144332706928253, "learning_rate": 5.202952666823225e-05, "loss": 7.0372, "step": 10930 }, { "epoch": 0.03631778058007566, "grad_norm": 0.16186295449733734, "learning_rate": 5.201253438801816e-05, "loss": 7.0317, "step": 10940 }, { "epoch": 0.03656998738965952, "grad_norm": 0.16343307495117188, "learning_rate": 5.199552679524758e-05, "loss": 7.0434, "step": 10950 }, { "epoch": 0.03682219419924338, "grad_norm": 0.15141761302947998, "learning_rate": 5.1978503901751505e-05, "loss": 7.035, "step": 10960 }, { "epoch": 0.03707440100882724, "grad_norm": 0.1524709314107895, "learning_rate": 5.1961465719371534e-05, "loss": 7.0346, "step": 10970 }, { "epoch": 0.0373266078184111, "grad_norm": 0.15960727632045746, "learning_rate": 5.1944412259959944e-05, "loss": 7.0382, "step": 10980 }, { "epoch": 0.03757881462799496, "grad_norm": 0.1581730991601944, "learning_rate": 5.19273435353796e-05, "loss": 7.0461, "step": 10990 }, { "epoch": 0.03783102143757881, "grad_norm": 0.1601705402135849, "learning_rate": 5.1910259557504e-05, "loss": 7.0432, "step": 11000 }, { "epoch": 0.038083228247162676, "grad_norm": 0.14598025381565094, "learning_rate": 5.189316033821726e-05, "loss": 7.0292, "step": 11010 }, { "epoch": 0.03833543505674653, "grad_norm": 0.14840783178806305, "learning_rate": 5.187604588941408e-05, "loss": 7.03, "step": 11020 }, { "epoch": 0.03858764186633039, "grad_norm": 0.14350850880146027, "learning_rate": 5.185891622299978e-05, "loss": 7.0276, "step": 11030 }, { "epoch": 0.03883984867591425, "grad_norm": 0.15016032755374908, "learning_rate": 5.184177135089025e-05, "loss": 7.0348, "step": 11040 }, { "epoch": 0.03909205548549811, "grad_norm": 0.14871519804000854, "learning_rate": 5.182461128501196e-05, "loss": 7.0291, "step": 11050 }, { "epoch": 0.03934426229508197, "grad_norm": 0.1500849723815918, "learning_rate": 5.1807436037301935e-05, "loss": 7.0305, "step": 11060 }, { "epoch": 0.03959646910466583, "grad_norm": 0.14324194192886353, "learning_rate": 5.17902456197078e-05, "loss": 7.0335, "step": 11070 }, { "epoch": 0.03984867591424968, "grad_norm": 0.14066678285598755, "learning_rate": 5.177304004418768e-05, "loss": 7.0236, "step": 11080 }, { "epoch": 0.040100882723833546, "grad_norm": 0.15000833570957184, "learning_rate": 5.175581932271029e-05, "loss": 7.0252, "step": 11090 }, { "epoch": 0.0403530895334174, "grad_norm": 0.16123367846012115, "learning_rate": 5.173858346725486e-05, "loss": 7.0418, "step": 11100 }, { "epoch": 0.04060529634300126, "grad_norm": 0.15601353347301483, "learning_rate": 5.172133248981115e-05, "loss": 7.0348, "step": 11110 }, { "epoch": 0.04085750315258512, "grad_norm": 0.15283720195293427, "learning_rate": 5.170406640237943e-05, "loss": 7.038, "step": 11120 }, { "epoch": 0.04110970996216898, "grad_norm": 0.17505806684494019, "learning_rate": 5.16867852169705e-05, "loss": 7.0384, "step": 11130 }, { "epoch": 0.04136191677175284, "grad_norm": 0.134765163064003, "learning_rate": 5.166948894560565e-05, "loss": 7.0335, "step": 11140 }, { "epoch": 0.0416141235813367, "grad_norm": 0.15557481348514557, "learning_rate": 5.165217760031667e-05, "loss": 7.0332, "step": 11150 }, { "epoch": 0.04186633039092055, "grad_norm": 0.1607121378183365, "learning_rate": 5.163485119314582e-05, "loss": 7.04, "step": 11160 }, { "epoch": 0.042118537200504416, "grad_norm": 0.15980176627635956, "learning_rate": 5.161750973614588e-05, "loss": 7.0404, "step": 11170 }, { "epoch": 0.04237074401008827, "grad_norm": 0.15535710752010345, "learning_rate": 5.160015324138005e-05, "loss": 7.0312, "step": 11180 }, { "epoch": 0.04262295081967213, "grad_norm": 0.13689051568508148, "learning_rate": 5.1582781720922005e-05, "loss": 7.0242, "step": 11190 }, { "epoch": 0.04287515762925599, "grad_norm": 0.13505877554416656, "learning_rate": 5.156539518685588e-05, "loss": 7.0229, "step": 11200 }, { "epoch": 0.04312736443883985, "grad_norm": 0.14404942095279694, "learning_rate": 5.154799365127626e-05, "loss": 7.019, "step": 11210 }, { "epoch": 0.04337957124842371, "grad_norm": 0.1420573592185974, "learning_rate": 5.153057712628816e-05, "loss": 7.0278, "step": 11220 }, { "epoch": 0.04363177805800757, "grad_norm": 0.1518537849187851, "learning_rate": 5.151314562400701e-05, "loss": 7.0378, "step": 11230 }, { "epoch": 0.04388398486759142, "grad_norm": 0.15453766286373138, "learning_rate": 5.149569915655867e-05, "loss": 7.0365, "step": 11240 }, { "epoch": 0.044136191677175286, "grad_norm": 0.16342812776565552, "learning_rate": 5.1478237736079416e-05, "loss": 7.04, "step": 11250 }, { "epoch": 0.04438839848675914, "grad_norm": 0.15029288828372955, "learning_rate": 5.14607613747159e-05, "loss": 7.0393, "step": 11260 }, { "epoch": 0.044640605296343, "grad_norm": 0.14080072939395905, "learning_rate": 5.1443270084625206e-05, "loss": 7.0272, "step": 11270 }, { "epoch": 0.04489281210592686, "grad_norm": 0.14435410499572754, "learning_rate": 5.142576387797476e-05, "loss": 7.0215, "step": 11280 }, { "epoch": 0.04514501891551072, "grad_norm": 0.15954451262950897, "learning_rate": 5.1408242766942406e-05, "loss": 7.0391, "step": 11290 }, { "epoch": 0.04539722572509458, "grad_norm": 0.16607175767421722, "learning_rate": 5.1390706763716326e-05, "loss": 7.0392, "step": 11300 }, { "epoch": 0.04564943253467844, "grad_norm": 0.15359872579574585, "learning_rate": 5.1373155880495074e-05, "loss": 7.0377, "step": 11310 }, { "epoch": 0.04590163934426229, "grad_norm": 0.14863024652004242, "learning_rate": 5.135559012948755e-05, "loss": 7.0335, "step": 11320 }, { "epoch": 0.046153846153846156, "grad_norm": 0.1428999900817871, "learning_rate": 5.1338009522913005e-05, "loss": 7.0291, "step": 11330 }, { "epoch": 0.04640605296343001, "grad_norm": 0.15701113641262054, "learning_rate": 5.1320414073001015e-05, "loss": 7.0333, "step": 11340 }, { "epoch": 0.04665825977301387, "grad_norm": 0.16276191174983978, "learning_rate": 5.130280379199149e-05, "loss": 7.0416, "step": 11350 }, { "epoch": 0.04691046658259773, "grad_norm": 0.14296533167362213, "learning_rate": 5.1285178692134644e-05, "loss": 7.0375, "step": 11360 }, { "epoch": 0.04716267339218159, "grad_norm": 0.15207801759243011, "learning_rate": 5.1267538785691005e-05, "loss": 7.0348, "step": 11370 }, { "epoch": 0.04741488020176545, "grad_norm": 0.15739642083644867, "learning_rate": 5.1249884084931406e-05, "loss": 7.0382, "step": 11380 }, { "epoch": 0.04766708701134931, "grad_norm": 0.1445837765932083, "learning_rate": 5.123221460213698e-05, "loss": 7.0272, "step": 11390 }, { "epoch": 0.04791929382093316, "grad_norm": 0.1555764079093933, "learning_rate": 5.121453034959911e-05, "loss": 7.0357, "step": 11400 }, { "epoch": 0.048171500630517027, "grad_norm": 0.12416047602891922, "learning_rate": 5.119683133961949e-05, "loss": 7.0203, "step": 11410 }, { "epoch": 0.04842370744010088, "grad_norm": 0.14021699130535126, "learning_rate": 5.117911758451005e-05, "loss": 7.0302, "step": 11420 }, { "epoch": 0.04867591424968474, "grad_norm": 0.135954812169075, "learning_rate": 5.1161389096593006e-05, "loss": 7.0214, "step": 11430 }, { "epoch": 0.0489281210592686, "grad_norm": 0.14897799491882324, "learning_rate": 5.114364588820081e-05, "loss": 7.0266, "step": 11440 }, { "epoch": 0.04918032786885246, "grad_norm": 0.13792872428894043, "learning_rate": 5.112588797167614e-05, "loss": 7.0198, "step": 11450 }, { "epoch": 0.04943253467843632, "grad_norm": 0.14467795193195343, "learning_rate": 5.110811535937191e-05, "loss": 7.0206, "step": 11460 }, { "epoch": 0.04968474148802018, "grad_norm": 0.145864799618721, "learning_rate": 5.1090328063651285e-05, "loss": 7.0289, "step": 11470 }, { "epoch": 0.049936948297604034, "grad_norm": 0.1414605975151062, "learning_rate": 5.10725260968876e-05, "loss": 7.0284, "step": 11480 }, { "epoch": 0.0501891551071879, "grad_norm": 0.11939103156328201, "learning_rate": 5.105470947146443e-05, "loss": 7.0217, "step": 11490 }, { "epoch": 0.05044136191677175, "grad_norm": 0.13066263496875763, "learning_rate": 5.1036878199775545e-05, "loss": 7.014, "step": 11500 }, { "epoch": 0.05069356872635561, "grad_norm": 0.12157106399536133, "learning_rate": 5.1019032294224885e-05, "loss": 7.0102, "step": 11510 }, { "epoch": 0.05094577553593947, "grad_norm": 0.13690294325351715, "learning_rate": 5.1001171767226577e-05, "loss": 7.0189, "step": 11520 }, { "epoch": 0.05119798234552333, "grad_norm": 0.12369751185178757, "learning_rate": 5.098329663120492e-05, "loss": 7.0148, "step": 11530 }, { "epoch": 0.05145018915510719, "grad_norm": 0.1365123838186264, "learning_rate": 5.0965406898594385e-05, "loss": 7.0162, "step": 11540 }, { "epoch": 0.05170239596469105, "grad_norm": 0.1578850895166397, "learning_rate": 5.094750258183958e-05, "loss": 7.038, "step": 11550 }, { "epoch": 0.051954602774274904, "grad_norm": 0.15742827951908112, "learning_rate": 5.092958369339527e-05, "loss": 7.0381, "step": 11560 }, { "epoch": 0.05220680958385877, "grad_norm": 0.14692486822605133, "learning_rate": 5.091165024572635e-05, "loss": 7.0344, "step": 11570 }, { "epoch": 0.05245901639344262, "grad_norm": 0.1409207135438919, "learning_rate": 5.089370225130785e-05, "loss": 7.0276, "step": 11580 }, { "epoch": 0.05271122320302648, "grad_norm": 0.1561894565820694, "learning_rate": 5.0875739722624913e-05, "loss": 7.0321, "step": 11590 }, { "epoch": 0.05296343001261034, "grad_norm": 0.15514469146728516, "learning_rate": 5.085776267217279e-05, "loss": 7.0351, "step": 11600 }, { "epoch": 0.0532156368221942, "grad_norm": 0.14834286272525787, "learning_rate": 5.083977111245684e-05, "loss": 7.0325, "step": 11610 }, { "epoch": 0.05346784363177806, "grad_norm": 0.14885829389095306, "learning_rate": 5.082176505599252e-05, "loss": 7.0343, "step": 11620 }, { "epoch": 0.05372005044136192, "grad_norm": 0.15368331968784332, "learning_rate": 5.080374451530536e-05, "loss": 7.0387, "step": 11630 }, { "epoch": 0.053972257250945774, "grad_norm": 0.1435731053352356, "learning_rate": 5.078570950293096e-05, "loss": 7.0328, "step": 11640 }, { "epoch": 0.05422446406052964, "grad_norm": 0.13712084293365479, "learning_rate": 5.076766003141501e-05, "loss": 7.0194, "step": 11650 }, { "epoch": 0.05447667087011349, "grad_norm": 0.1415771096944809, "learning_rate": 5.074959611331323e-05, "loss": 7.0173, "step": 11660 }, { "epoch": 0.05472887767969735, "grad_norm": 0.15046890079975128, "learning_rate": 5.073151776119143e-05, "loss": 7.0178, "step": 11670 }, { "epoch": 0.05498108448928121, "grad_norm": 0.15422260761260986, "learning_rate": 5.0713424987625414e-05, "loss": 7.0324, "step": 11680 }, { "epoch": 0.05523329129886507, "grad_norm": 0.15238641202449799, "learning_rate": 5.0695317805201055e-05, "loss": 7.0327, "step": 11690 }, { "epoch": 0.05548549810844893, "grad_norm": 0.1359781175851822, "learning_rate": 5.067719622651422e-05, "loss": 7.0259, "step": 11700 }, { "epoch": 0.05573770491803279, "grad_norm": 0.14677304029464722, "learning_rate": 5.065906026417081e-05, "loss": 7.0259, "step": 11710 }, { "epoch": 0.055989911727616644, "grad_norm": 0.1425226926803589, "learning_rate": 5.0640909930786714e-05, "loss": 7.0207, "step": 11720 }, { "epoch": 0.05624211853720051, "grad_norm": 0.1463133543729782, "learning_rate": 5.062274523898785e-05, "loss": 7.0269, "step": 11730 }, { "epoch": 0.05649432534678436, "grad_norm": 0.14427851140499115, "learning_rate": 5.060456620141009e-05, "loss": 7.0249, "step": 11740 }, { "epoch": 0.05674653215636822, "grad_norm": 0.14741943776607513, "learning_rate": 5.05863728306993e-05, "loss": 7.0331, "step": 11750 }, { "epoch": 0.05699873896595208, "grad_norm": 0.14652401208877563, "learning_rate": 5.056816513951132e-05, "loss": 7.0354, "step": 11760 }, { "epoch": 0.05725094577553594, "grad_norm": 0.15632258355617523, "learning_rate": 5.054994314051195e-05, "loss": 7.0266, "step": 11770 }, { "epoch": 0.0575031525851198, "grad_norm": 0.1502395123243332, "learning_rate": 5.0531706846376936e-05, "loss": 7.0376, "step": 11780 }, { "epoch": 0.05775535939470366, "grad_norm": 0.15964196622371674, "learning_rate": 5.0513456269791964e-05, "loss": 7.0359, "step": 11790 }, { "epoch": 0.058007566204287514, "grad_norm": 0.1488771289587021, "learning_rate": 5.049519142345268e-05, "loss": 7.0318, "step": 11800 }, { "epoch": 0.05825977301387138, "grad_norm": 0.14216186106204987, "learning_rate": 5.047691232006464e-05, "loss": 7.0285, "step": 11810 }, { "epoch": 0.05851197982345523, "grad_norm": 0.12983928620815277, "learning_rate": 5.045861897234329e-05, "loss": 7.0234, "step": 11820 }, { "epoch": 0.05876418663303909, "grad_norm": 0.15469050407409668, "learning_rate": 5.044031139301404e-05, "loss": 7.028, "step": 11830 }, { "epoch": 0.05901639344262295, "grad_norm": 0.1563747525215149, "learning_rate": 5.042198959481217e-05, "loss": 7.0405, "step": 11840 }, { "epoch": 0.05926860025220681, "grad_norm": 0.15492437779903412, "learning_rate": 5.040365359048284e-05, "loss": 7.0401, "step": 11850 }, { "epoch": 0.05952080706179067, "grad_norm": 0.14895160496234894, "learning_rate": 5.0385303392781126e-05, "loss": 7.0357, "step": 11860 }, { "epoch": 0.05977301387137453, "grad_norm": 0.13669539988040924, "learning_rate": 5.036693901447194e-05, "loss": 7.0343, "step": 11870 }, { "epoch": 0.060025220680958384, "grad_norm": 0.11869065463542938, "learning_rate": 5.034856046833007e-05, "loss": 7.0162, "step": 11880 }, { "epoch": 0.06027742749054225, "grad_norm": 0.13329549133777618, "learning_rate": 5.033016776714019e-05, "loss": 7.0238, "step": 11890 }, { "epoch": 0.0605296343001261, "grad_norm": 0.1501324325799942, "learning_rate": 5.0311760923696767e-05, "loss": 7.0288, "step": 11900 }, { "epoch": 0.06078184110970996, "grad_norm": 0.10855961591005325, "learning_rate": 5.0293339950804155e-05, "loss": 7.0144, "step": 11910 }, { "epoch": 0.06103404791929382, "grad_norm": 0.12174750119447708, "learning_rate": 5.027490486127651e-05, "loss": 7.0077, "step": 11920 }, { "epoch": 0.06128625472887768, "grad_norm": 0.13936083018779755, "learning_rate": 5.025645566793782e-05, "loss": 7.0139, "step": 11930 }, { "epoch": 0.06153846153846154, "grad_norm": 0.15022863447666168, "learning_rate": 5.0237992383621866e-05, "loss": 7.0258, "step": 11940 }, { "epoch": 0.0617906683480454, "grad_norm": 0.14395952224731445, "learning_rate": 5.0219515021172256e-05, "loss": 7.025, "step": 11950 }, { "epoch": 0.062042875157629254, "grad_norm": 0.1315990686416626, "learning_rate": 5.020102359344237e-05, "loss": 7.0295, "step": 11960 }, { "epoch": 0.06229508196721312, "grad_norm": 0.1273866444826126, "learning_rate": 5.018251811329538e-05, "loss": 7.0189, "step": 11970 }, { "epoch": 0.06254728877679698, "grad_norm": 0.1142813041806221, "learning_rate": 5.016399859360423e-05, "loss": 7.0158, "step": 11980 }, { "epoch": 0.06279949558638083, "grad_norm": 0.14166325330734253, "learning_rate": 5.014546504725165e-05, "loss": 7.0156, "step": 11990 }, { "epoch": 0.06305170239596469, "grad_norm": 0.1395639181137085, "learning_rate": 5.01269174871301e-05, "loss": 7.0184, "step": 12000 }, { "epoch": 0.06330390920554856, "grad_norm": 0.14077448844909668, "learning_rate": 5.01083559261418e-05, "loss": 7.0254, "step": 12010 }, { "epoch": 0.0635561160151324, "grad_norm": 0.13380302488803864, "learning_rate": 5.008978037719872e-05, "loss": 7.0242, "step": 12020 }, { "epoch": 0.06380832282471627, "grad_norm": 0.14080525934696198, "learning_rate": 5.007119085322254e-05, "loss": 7.021, "step": 12030 }, { "epoch": 0.06406052963430013, "grad_norm": 0.15185093879699707, "learning_rate": 5.005258736714467e-05, "loss": 7.0256, "step": 12040 }, { "epoch": 0.06431273644388398, "grad_norm": 0.1479945331811905, "learning_rate": 5.003396993190623e-05, "loss": 7.0316, "step": 12050 }, { "epoch": 0.06456494325346784, "grad_norm": 0.13748492300510406, "learning_rate": 5.001533856045807e-05, "loss": 7.0344, "step": 12060 }, { "epoch": 0.0648171500630517, "grad_norm": 0.14999014139175415, "learning_rate": 4.999669326576068e-05, "loss": 7.0188, "step": 12070 }, { "epoch": 0.06506935687263556, "grad_norm": 0.14083826541900635, "learning_rate": 4.997803406078429e-05, "loss": 7.029, "step": 12080 }, { "epoch": 0.06532156368221942, "grad_norm": 0.13417960703372955, "learning_rate": 4.995936095850878e-05, "loss": 7.023, "step": 12090 }, { "epoch": 0.06557377049180328, "grad_norm": 0.13966858386993408, "learning_rate": 4.994067397192369e-05, "loss": 7.0192, "step": 12100 }, { "epoch": 0.06582597730138713, "grad_norm": 0.1372358202934265, "learning_rate": 4.992197311402824e-05, "loss": 7.023, "step": 12110 }, { "epoch": 0.066078184110971, "grad_norm": 0.12524057924747467, "learning_rate": 4.990325839783128e-05, "loss": 7.0143, "step": 12120 }, { "epoch": 0.06633039092055486, "grad_norm": 0.13278937339782715, "learning_rate": 4.9884529836351326e-05, "loss": 7.0199, "step": 12130 }, { "epoch": 0.06658259773013872, "grad_norm": 0.1415076106786728, "learning_rate": 4.9865787442616474e-05, "loss": 7.0288, "step": 12140 }, { "epoch": 0.06683480453972257, "grad_norm": 0.14703570306301117, "learning_rate": 4.984703122966452e-05, "loss": 7.0233, "step": 12150 }, { "epoch": 0.06708701134930643, "grad_norm": 0.1404167264699936, "learning_rate": 4.9828261210542784e-05, "loss": 7.0317, "step": 12160 }, { "epoch": 0.0673392181588903, "grad_norm": 0.14195562899112701, "learning_rate": 4.980947739830827e-05, "loss": 7.0236, "step": 12170 }, { "epoch": 0.06759142496847415, "grad_norm": 0.13370227813720703, "learning_rate": 4.9790679806027533e-05, "loss": 7.025, "step": 12180 }, { "epoch": 0.06784363177805801, "grad_norm": 0.14206789433956146, "learning_rate": 4.9771868446776706e-05, "loss": 7.0289, "step": 12190 }, { "epoch": 0.06809583858764187, "grad_norm": 0.1586645245552063, "learning_rate": 4.9753043333641535e-05, "loss": 7.0351, "step": 12200 }, { "epoch": 0.06834804539722572, "grad_norm": 0.13752467930316925, "learning_rate": 4.9734204479717306e-05, "loss": 7.0301, "step": 12210 }, { "epoch": 0.06860025220680958, "grad_norm": 0.14216767251491547, "learning_rate": 4.971535189810886e-05, "loss": 7.0244, "step": 12220 }, { "epoch": 0.06885245901639345, "grad_norm": 0.14955401420593262, "learning_rate": 4.969648560193063e-05, "loss": 7.0257, "step": 12230 }, { "epoch": 0.0691046658259773, "grad_norm": 0.14611779153347015, "learning_rate": 4.967760560430653e-05, "loss": 7.0343, "step": 12240 }, { "epoch": 0.06935687263556116, "grad_norm": 0.14256863296031952, "learning_rate": 4.965871191837004e-05, "loss": 7.0298, "step": 12250 }, { "epoch": 0.06960907944514502, "grad_norm": 0.15051907300949097, "learning_rate": 4.963980455726415e-05, "loss": 7.0334, "step": 12260 }, { "epoch": 0.06986128625472887, "grad_norm": 0.1293012946844101, "learning_rate": 4.962088353414139e-05, "loss": 7.0255, "step": 12270 }, { "epoch": 0.07011349306431273, "grad_norm": 0.12513817846775055, "learning_rate": 4.960194886216375e-05, "loss": 7.0086, "step": 12280 }, { "epoch": 0.0703656998738966, "grad_norm": 0.12742896378040314, "learning_rate": 4.958300055450275e-05, "loss": 7.0112, "step": 12290 }, { "epoch": 0.07061790668348046, "grad_norm": 0.14425885677337646, "learning_rate": 4.956403862433936e-05, "loss": 7.0222, "step": 12300 }, { "epoch": 0.07087011349306431, "grad_norm": 0.14601625502109528, "learning_rate": 4.9545063084864075e-05, "loss": 7.0242, "step": 12310 }, { "epoch": 0.07112232030264817, "grad_norm": 0.1402921825647354, "learning_rate": 4.952607394927681e-05, "loss": 7.0299, "step": 12320 }, { "epoch": 0.07137452711223204, "grad_norm": 0.14559562504291534, "learning_rate": 4.950707123078697e-05, "loss": 7.0302, "step": 12330 }, { "epoch": 0.07162673392181589, "grad_norm": 0.12921951711177826, "learning_rate": 4.9488054942613384e-05, "loss": 7.0181, "step": 12340 }, { "epoch": 0.07187894073139975, "grad_norm": 0.12267972528934479, "learning_rate": 4.946902509798435e-05, "loss": 7.0191, "step": 12350 }, { "epoch": 0.07213114754098361, "grad_norm": 0.11541295051574707, "learning_rate": 4.9449981710137567e-05, "loss": 7.0065, "step": 12360 }, { "epoch": 0.07238335435056746, "grad_norm": 0.1278163641691208, "learning_rate": 4.943092479232017e-05, "loss": 7.0133, "step": 12370 }, { "epoch": 0.07263556116015132, "grad_norm": 0.13893121480941772, "learning_rate": 4.941185435778869e-05, "loss": 7.0301, "step": 12380 }, { "epoch": 0.07288776796973519, "grad_norm": 0.14087802171707153, "learning_rate": 4.93927704198091e-05, "loss": 7.0292, "step": 12390 }, { "epoch": 0.07313997477931904, "grad_norm": 0.14365176856517792, "learning_rate": 4.937367299165672e-05, "loss": 7.0252, "step": 12400 }, { "epoch": 0.0733921815889029, "grad_norm": 0.14902140200138092, "learning_rate": 4.935456208661628e-05, "loss": 7.0312, "step": 12410 }, { "epoch": 0.07364438839848676, "grad_norm": 0.13938818871974945, "learning_rate": 4.9335437717981884e-05, "loss": 7.0328, "step": 12420 }, { "epoch": 0.07389659520807061, "grad_norm": 0.13526605069637299, "learning_rate": 4.931629989905699e-05, "loss": 7.0287, "step": 12430 }, { "epoch": 0.07414880201765447, "grad_norm": 0.132283553481102, "learning_rate": 4.9297148643154425e-05, "loss": 7.0242, "step": 12440 }, { "epoch": 0.07440100882723834, "grad_norm": 0.11311656981706619, "learning_rate": 4.9277983963596355e-05, "loss": 7.0128, "step": 12450 }, { "epoch": 0.0746532156368222, "grad_norm": 0.13826364278793335, "learning_rate": 4.925880587371428e-05, "loss": 7.0142, "step": 12460 }, { "epoch": 0.07490542244640605, "grad_norm": 0.13670676946640015, "learning_rate": 4.923961438684903e-05, "loss": 7.0203, "step": 12470 }, { "epoch": 0.07515762925598991, "grad_norm": 0.14014624059200287, "learning_rate": 4.922040951635078e-05, "loss": 7.0254, "step": 12480 }, { "epoch": 0.07540983606557378, "grad_norm": 0.13013994693756104, "learning_rate": 4.920119127557897e-05, "loss": 7.019, "step": 12490 }, { "epoch": 0.07566204287515763, "grad_norm": 0.13806353509426117, "learning_rate": 4.9181959677902395e-05, "loss": 7.0244, "step": 12500 }, { "epoch": 0.07591424968474149, "grad_norm": 0.13144336640834808, "learning_rate": 4.916271473669909e-05, "loss": 7.0241, "step": 12510 }, { "epoch": 0.07616645649432535, "grad_norm": 0.13433575630187988, "learning_rate": 4.9143456465356385e-05, "loss": 7.0211, "step": 12520 }, { "epoch": 0.0764186633039092, "grad_norm": 0.1267358809709549, "learning_rate": 4.9124184877270915e-05, "loss": 7.0201, "step": 12530 }, { "epoch": 0.07667087011349306, "grad_norm": 0.1410062313079834, "learning_rate": 4.9104899985848544e-05, "loss": 7.0171, "step": 12540 }, { "epoch": 0.07692307692307693, "grad_norm": 0.12768687307834625, "learning_rate": 4.90856018045044e-05, "loss": 7.0157, "step": 12550 }, { "epoch": 0.07717528373266078, "grad_norm": 0.1377383917570114, "learning_rate": 4.906629034666285e-05, "loss": 7.0186, "step": 12560 }, { "epoch": 0.07742749054224464, "grad_norm": 0.11399853229522705, "learning_rate": 4.9046965625757513e-05, "loss": 7.023, "step": 12570 }, { "epoch": 0.0776796973518285, "grad_norm": 0.13261806964874268, "learning_rate": 4.902762765523123e-05, "loss": 7.0173, "step": 12580 }, { "epoch": 0.07793190416141235, "grad_norm": 0.13203908503055573, "learning_rate": 4.900827644853604e-05, "loss": 7.0189, "step": 12590 }, { "epoch": 0.07818411097099622, "grad_norm": 0.12563125789165497, "learning_rate": 4.8988912019133215e-05, "loss": 7.0155, "step": 12600 }, { "epoch": 0.07843631778058008, "grad_norm": 0.14987653493881226, "learning_rate": 4.89695343804932e-05, "loss": 7.022, "step": 12610 }, { "epoch": 0.07868852459016394, "grad_norm": 0.1524726003408432, "learning_rate": 4.895014354609564e-05, "loss": 7.0316, "step": 12620 }, { "epoch": 0.07894073139974779, "grad_norm": 0.1448589712381363, "learning_rate": 4.893073952942938e-05, "loss": 7.0325, "step": 12630 }, { "epoch": 0.07919293820933165, "grad_norm": 0.1356041580438614, "learning_rate": 4.8911322343992407e-05, "loss": 7.0214, "step": 12640 }, { "epoch": 0.07944514501891552, "grad_norm": 0.11139481514692307, "learning_rate": 4.889189200329188e-05, "loss": 7.019, "step": 12650 }, { "epoch": 0.07969735182849937, "grad_norm": 0.13088202476501465, "learning_rate": 4.887244852084409e-05, "loss": 7.0096, "step": 12660 }, { "epoch": 0.07994955863808323, "grad_norm": 0.13917101919651031, "learning_rate": 4.885299191017451e-05, "loss": 7.0272, "step": 12670 }, { "epoch": 0.08020176544766709, "grad_norm": 0.14080755412578583, "learning_rate": 4.88335221848177e-05, "loss": 7.0252, "step": 12680 }, { "epoch": 0.08045397225725094, "grad_norm": 0.1287941187620163, "learning_rate": 4.8814039358317384e-05, "loss": 7.0245, "step": 12690 }, { "epoch": 0.0807061790668348, "grad_norm": 0.13883496820926666, "learning_rate": 4.8794543444226374e-05, "loss": 7.0206, "step": 12700 }, { "epoch": 0.08095838587641867, "grad_norm": 0.10845118761062622, "learning_rate": 4.8775034456106594e-05, "loss": 7.0053, "step": 12710 }, { "epoch": 0.08121059268600252, "grad_norm": 0.12203620374202728, "learning_rate": 4.8755512407529056e-05, "loss": 7.0153, "step": 12720 }, { "epoch": 0.08146279949558638, "grad_norm": 0.13356278836727142, "learning_rate": 4.873597731207386e-05, "loss": 7.0137, "step": 12730 }, { "epoch": 0.08171500630517024, "grad_norm": 0.12551027536392212, "learning_rate": 4.8716429183330184e-05, "loss": 7.025, "step": 12740 }, { "epoch": 0.08196721311475409, "grad_norm": 0.13855217397212982, "learning_rate": 4.8696868034896284e-05, "loss": 7.0219, "step": 12750 }, { "epoch": 0.08221941992433796, "grad_norm": 0.14363788068294525, "learning_rate": 4.867729388037945e-05, "loss": 7.0321, "step": 12760 }, { "epoch": 0.08247162673392182, "grad_norm": 0.1500197798013687, "learning_rate": 4.865770673339604e-05, "loss": 7.034, "step": 12770 }, { "epoch": 0.08272383354350568, "grad_norm": 0.12682144343852997, "learning_rate": 4.8638106607571435e-05, "loss": 7.0274, "step": 12780 }, { "epoch": 0.08297604035308953, "grad_norm": 0.14339524507522583, "learning_rate": 4.861849351654005e-05, "loss": 7.0232, "step": 12790 }, { "epoch": 0.0832282471626734, "grad_norm": 0.139934703707695, "learning_rate": 4.8598867473945335e-05, "loss": 7.0257, "step": 12800 }, { "epoch": 0.08348045397225726, "grad_norm": 0.14305716753005981, "learning_rate": 4.85792284934397e-05, "loss": 7.0283, "step": 12810 }, { "epoch": 0.0837326607818411, "grad_norm": 0.14580202102661133, "learning_rate": 4.8559576588684615e-05, "loss": 7.0303, "step": 12820 }, { "epoch": 0.08398486759142497, "grad_norm": 0.14225731790065765, "learning_rate": 4.85399117733505e-05, "loss": 7.0289, "step": 12830 }, { "epoch": 0.08423707440100883, "grad_norm": 0.13295449316501617, "learning_rate": 4.85202340611168e-05, "loss": 7.0245, "step": 12840 }, { "epoch": 0.08448928121059268, "grad_norm": 0.134893536567688, "learning_rate": 4.850054346567185e-05, "loss": 7.0208, "step": 12850 }, { "epoch": 0.08474148802017654, "grad_norm": 0.12871254980564117, "learning_rate": 4.848084000071305e-05, "loss": 7.018, "step": 12860 }, { "epoch": 0.08499369482976041, "grad_norm": 0.12594933807849884, "learning_rate": 4.8461123679946664e-05, "loss": 7.0215, "step": 12870 }, { "epoch": 0.08524590163934426, "grad_norm": 0.10554085671901703, "learning_rate": 4.8441394517087956e-05, "loss": 7.0083, "step": 12880 }, { "epoch": 0.08549810844892812, "grad_norm": 0.11201979219913483, "learning_rate": 4.842165252586109e-05, "loss": 7.0012, "step": 12890 }, { "epoch": 0.08575031525851198, "grad_norm": 0.1294853538274765, "learning_rate": 4.840189771999918e-05, "loss": 7.0075, "step": 12900 }, { "epoch": 0.08600252206809583, "grad_norm": 0.14511358737945557, "learning_rate": 4.8382130113244233e-05, "loss": 7.0252, "step": 12910 }, { "epoch": 0.0862547288776797, "grad_norm": 0.13419951498508453, "learning_rate": 4.836234971934718e-05, "loss": 7.0223, "step": 12920 }, { "epoch": 0.08650693568726356, "grad_norm": 0.11297326534986496, "learning_rate": 4.834255655206782e-05, "loss": 7.0131, "step": 12930 }, { "epoch": 0.08675914249684742, "grad_norm": 0.1256144642829895, "learning_rate": 4.832275062517486e-05, "loss": 7.0059, "step": 12940 }, { "epoch": 0.08701134930643127, "grad_norm": 0.13714396953582764, "learning_rate": 4.830293195244587e-05, "loss": 7.0139, "step": 12950 }, { "epoch": 0.08726355611601513, "grad_norm": 0.13577991724014282, "learning_rate": 4.8283100547667304e-05, "loss": 7.0258, "step": 12960 }, { "epoch": 0.087515762925599, "grad_norm": 0.12636876106262207, "learning_rate": 4.8263256424634464e-05, "loss": 7.0229, "step": 12970 }, { "epoch": 0.08776796973518285, "grad_norm": 0.11677609384059906, "learning_rate": 4.824339959715148e-05, "loss": 7.013, "step": 12980 }, { "epoch": 0.08802017654476671, "grad_norm": 0.10930849611759186, "learning_rate": 4.822353007903135e-05, "loss": 7.0089, "step": 12990 }, { "epoch": 0.08827238335435057, "grad_norm": 0.12910854816436768, "learning_rate": 4.820364788409588e-05, "loss": 7.0105, "step": 13000 }, { "epoch": 0.08852459016393442, "grad_norm": 0.14956659078598022, "learning_rate": 4.818375302617571e-05, "loss": 7.0295, "step": 13010 }, { "epoch": 0.08877679697351828, "grad_norm": 0.1418309360742569, "learning_rate": 4.8163845519110265e-05, "loss": 7.0344, "step": 13020 }, { "epoch": 0.08902900378310215, "grad_norm": 0.1441190242767334, "learning_rate": 4.81439253767478e-05, "loss": 7.0281, "step": 13030 }, { "epoch": 0.089281210592686, "grad_norm": 0.1450979858636856, "learning_rate": 4.812399261294532e-05, "loss": 7.0298, "step": 13040 }, { "epoch": 0.08953341740226986, "grad_norm": 0.1480085700750351, "learning_rate": 4.8104047241568654e-05, "loss": 7.029, "step": 13050 }, { "epoch": 0.08978562421185372, "grad_norm": 0.12214242666959763, "learning_rate": 4.8084089276492383e-05, "loss": 7.0227, "step": 13060 }, { "epoch": 0.09003783102143757, "grad_norm": 0.11272437125444412, "learning_rate": 4.8064118731599814e-05, "loss": 7.0083, "step": 13070 }, { "epoch": 0.09029003783102144, "grad_norm": 0.11570771038532257, "learning_rate": 4.8044135620783066e-05, "loss": 7.007, "step": 13080 }, { "epoch": 0.0905422446406053, "grad_norm": 0.14072231948375702, "learning_rate": 4.802413995794297e-05, "loss": 7.0188, "step": 13090 }, { "epoch": 0.09079445145018916, "grad_norm": 0.12616895139217377, "learning_rate": 4.800413175698907e-05, "loss": 7.0209, "step": 13100 }, { "epoch": 0.09104665825977301, "grad_norm": 0.129970982670784, "learning_rate": 4.798411103183966e-05, "loss": 7.0247, "step": 13110 }, { "epoch": 0.09129886506935687, "grad_norm": 0.13040684163570404, "learning_rate": 4.796407779642174e-05, "loss": 7.0223, "step": 13120 }, { "epoch": 0.09155107187894074, "grad_norm": 0.13958249986171722, "learning_rate": 4.7944032064670994e-05, "loss": 7.0255, "step": 13130 }, { "epoch": 0.09180327868852459, "grad_norm": 0.11925708502531052, "learning_rate": 4.7923973850531834e-05, "loss": 7.0172, "step": 13140 }, { "epoch": 0.09205548549810845, "grad_norm": 0.12187664955854416, "learning_rate": 4.790390316795733e-05, "loss": 7.0135, "step": 13150 }, { "epoch": 0.09230769230769231, "grad_norm": 0.12377975881099701, "learning_rate": 4.788382003090921e-05, "loss": 7.0104, "step": 13160 }, { "epoch": 0.09255989911727616, "grad_norm": 0.13899922370910645, "learning_rate": 4.7863724453357925e-05, "loss": 7.0228, "step": 13170 }, { "epoch": 0.09281210592686003, "grad_norm": 0.13049064576625824, "learning_rate": 4.78436164492825e-05, "loss": 7.0196, "step": 13180 }, { "epoch": 0.09306431273644389, "grad_norm": 0.12285946309566498, "learning_rate": 4.782349603267066e-05, "loss": 7.0143, "step": 13190 }, { "epoch": 0.09331651954602774, "grad_norm": 0.12522397935390472, "learning_rate": 4.780336321751876e-05, "loss": 7.019, "step": 13200 }, { "epoch": 0.0935687263556116, "grad_norm": 0.1380007266998291, "learning_rate": 4.778321801783175e-05, "loss": 7.0171, "step": 13210 }, { "epoch": 0.09382093316519546, "grad_norm": 0.1342228502035141, "learning_rate": 4.776306044762323e-05, "loss": 7.0258, "step": 13220 }, { "epoch": 0.09407313997477931, "grad_norm": 0.11918234080076218, "learning_rate": 4.774289052091539e-05, "loss": 7.0128, "step": 13230 }, { "epoch": 0.09432534678436318, "grad_norm": 0.11027359962463379, "learning_rate": 4.7722708251738996e-05, "loss": 7.0082, "step": 13240 }, { "epoch": 0.09457755359394704, "grad_norm": 0.12176631391048431, "learning_rate": 4.770251365413345e-05, "loss": 7.0151, "step": 13250 }, { "epoch": 0.0948297604035309, "grad_norm": 0.13240616023540497, "learning_rate": 4.7682306742146666e-05, "loss": 7.0203, "step": 13260 }, { "epoch": 0.09508196721311475, "grad_norm": 0.12726910412311554, "learning_rate": 4.766208752983518e-05, "loss": 7.0195, "step": 13270 }, { "epoch": 0.09533417402269861, "grad_norm": 0.11675147712230682, "learning_rate": 4.7641856031264056e-05, "loss": 7.0145, "step": 13280 }, { "epoch": 0.09558638083228248, "grad_norm": 0.11503644287586212, "learning_rate": 4.7621612260506905e-05, "loss": 7.0155, "step": 13290 }, { "epoch": 0.09583858764186633, "grad_norm": 0.11184673756361008, "learning_rate": 4.760135623164589e-05, "loss": 7.0137, "step": 13300 }, { "epoch": 0.09609079445145019, "grad_norm": 0.12460320442914963, "learning_rate": 4.7581087958771686e-05, "loss": 7.0078, "step": 13310 }, { "epoch": 0.09634300126103405, "grad_norm": 0.12081223726272583, "learning_rate": 4.756080745598349e-05, "loss": 7.0121, "step": 13320 }, { "epoch": 0.0965952080706179, "grad_norm": 0.12213904410600662, "learning_rate": 4.7540514737389e-05, "loss": 7.0212, "step": 13330 }, { "epoch": 0.09684741488020177, "grad_norm": 0.12489718198776245, "learning_rate": 4.7520209817104425e-05, "loss": 7.0137, "step": 13340 }, { "epoch": 0.09709962168978563, "grad_norm": 0.12383078038692474, "learning_rate": 4.749989270925447e-05, "loss": 7.0083, "step": 13350 }, { "epoch": 0.09735182849936948, "grad_norm": 0.12724076211452484, "learning_rate": 4.747956342797228e-05, "loss": 7.0133, "step": 13360 }, { "epoch": 0.09760403530895334, "grad_norm": 0.12920905649662018, "learning_rate": 4.7459221987399505e-05, "loss": 7.0169, "step": 13370 }, { "epoch": 0.0978562421185372, "grad_norm": 0.13296271860599518, "learning_rate": 4.743886840168624e-05, "loss": 7.022, "step": 13380 }, { "epoch": 0.09810844892812105, "grad_norm": 0.13740451633930206, "learning_rate": 4.741850268499103e-05, "loss": 7.0212, "step": 13390 }, { "epoch": 0.09836065573770492, "grad_norm": 0.11711777746677399, "learning_rate": 4.739812485148085e-05, "loss": 7.0198, "step": 13400 }, { "epoch": 0.09861286254728878, "grad_norm": 0.11766014993190765, "learning_rate": 4.7377734915331116e-05, "loss": 7.0119, "step": 13410 }, { "epoch": 0.09886506935687264, "grad_norm": 0.11883403360843658, "learning_rate": 4.735733289072565e-05, "loss": 7.0149, "step": 13420 }, { "epoch": 0.09911727616645649, "grad_norm": 0.12281854450702667, "learning_rate": 4.7336918791856706e-05, "loss": 7.0107, "step": 13430 }, { "epoch": 0.09936948297604035, "grad_norm": 0.12307630479335785, "learning_rate": 4.731649263292492e-05, "loss": 7.0132, "step": 13440 }, { "epoch": 0.09962168978562422, "grad_norm": 0.13370613753795624, "learning_rate": 4.7296054428139305e-05, "loss": 7.0181, "step": 13450 }, { "epoch": 0.09987389659520807, "grad_norm": 0.14053653180599213, "learning_rate": 4.7275604191717284e-05, "loss": 7.0245, "step": 13460 }, { "epoch": 0.10012610340479193, "grad_norm": 0.1404641717672348, "learning_rate": 4.725514193788462e-05, "loss": 7.026, "step": 13470 }, { "epoch": 0.1003783102143758, "grad_norm": 0.13830697536468506, "learning_rate": 4.7234667680875454e-05, "loss": 7.0265, "step": 13480 }, { "epoch": 0.10063051702395964, "grad_norm": 0.13959752023220062, "learning_rate": 4.721418143493229e-05, "loss": 7.0271, "step": 13490 }, { "epoch": 0.1008827238335435, "grad_norm": 0.14307384192943573, "learning_rate": 4.719368321430593e-05, "loss": 7.0276, "step": 13500 }, { "epoch": 0.10113493064312737, "grad_norm": 0.1324107050895691, "learning_rate": 4.717317303325554e-05, "loss": 7.0295, "step": 13510 }, { "epoch": 0.10138713745271122, "grad_norm": 0.11833399534225464, "learning_rate": 4.715265090604859e-05, "loss": 7.0201, "step": 13520 }, { "epoch": 0.10163934426229508, "grad_norm": 0.11768066138029099, "learning_rate": 4.713211684696088e-05, "loss": 7.0181, "step": 13530 }, { "epoch": 0.10189155107187894, "grad_norm": 0.12946943938732147, "learning_rate": 4.711157087027649e-05, "loss": 7.0145, "step": 13540 }, { "epoch": 0.1021437578814628, "grad_norm": 0.1265946477651596, "learning_rate": 4.709101299028778e-05, "loss": 7.0205, "step": 13550 }, { "epoch": 0.10239596469104666, "grad_norm": 0.1177324652671814, "learning_rate": 4.707044322129544e-05, "loss": 7.0144, "step": 13560 }, { "epoch": 0.10264817150063052, "grad_norm": 0.11794262379407883, "learning_rate": 4.704986157760836e-05, "loss": 7.0069, "step": 13570 }, { "epoch": 0.10290037831021438, "grad_norm": 0.12420350313186646, "learning_rate": 4.702926807354376e-05, "loss": 7.0064, "step": 13580 }, { "epoch": 0.10315258511979823, "grad_norm": 0.13917531073093414, "learning_rate": 4.700866272342705e-05, "loss": 7.0155, "step": 13590 }, { "epoch": 0.1034047919293821, "grad_norm": 0.12509499490261078, "learning_rate": 4.6988045541591926e-05, "loss": 7.0186, "step": 13600 }, { "epoch": 0.10365699873896596, "grad_norm": 0.12273912876844406, "learning_rate": 4.696741654238029e-05, "loss": 7.0145, "step": 13610 }, { "epoch": 0.10390920554854981, "grad_norm": 0.12733866274356842, "learning_rate": 4.694677574014228e-05, "loss": 7.0201, "step": 13620 }, { "epoch": 0.10416141235813367, "grad_norm": 0.11801856756210327, "learning_rate": 4.692612314923622e-05, "loss": 7.0203, "step": 13630 }, { "epoch": 0.10441361916771753, "grad_norm": 0.1197308748960495, "learning_rate": 4.690545878402867e-05, "loss": 7.0083, "step": 13640 }, { "epoch": 0.10466582597730138, "grad_norm": 0.12088865041732788, "learning_rate": 4.6884782658894346e-05, "loss": 7.0112, "step": 13650 }, { "epoch": 0.10491803278688525, "grad_norm": 0.13499045372009277, "learning_rate": 4.686409478821617e-05, "loss": 7.0179, "step": 13660 }, { "epoch": 0.10517023959646911, "grad_norm": 0.11174165457487106, "learning_rate": 4.6843395186385215e-05, "loss": 7.0104, "step": 13670 }, { "epoch": 0.10542244640605296, "grad_norm": 0.11878480017185211, "learning_rate": 4.682268386780072e-05, "loss": 7.0099, "step": 13680 }, { "epoch": 0.10567465321563682, "grad_norm": 0.10584497451782227, "learning_rate": 4.6801960846870094e-05, "loss": 7.0089, "step": 13690 }, { "epoch": 0.10592686002522068, "grad_norm": 0.11242086440324783, "learning_rate": 4.678122613800886e-05, "loss": 7.006, "step": 13700 }, { "epoch": 0.10617906683480453, "grad_norm": 0.11694322526454926, "learning_rate": 4.6760479755640684e-05, "loss": 7.0008, "step": 13710 }, { "epoch": 0.1064312736443884, "grad_norm": 0.13276849687099457, "learning_rate": 4.6739721714197344e-05, "loss": 7.0144, "step": 13720 }, { "epoch": 0.10668348045397226, "grad_norm": 0.12587708234786987, "learning_rate": 4.671895202811874e-05, "loss": 7.012, "step": 13730 }, { "epoch": 0.10693568726355612, "grad_norm": 0.1296691745519638, "learning_rate": 4.6698170711852876e-05, "loss": 7.0201, "step": 13740 }, { "epoch": 0.10718789407313997, "grad_norm": 0.1344408541917801, "learning_rate": 4.6677377779855826e-05, "loss": 7.0249, "step": 13750 }, { "epoch": 0.10744010088272384, "grad_norm": 0.1295984387397766, "learning_rate": 4.665657324659176e-05, "loss": 7.0261, "step": 13760 }, { "epoch": 0.1076923076923077, "grad_norm": 0.11184023320674896, "learning_rate": 4.663575712653291e-05, "loss": 7.0127, "step": 13770 }, { "epoch": 0.10794451450189155, "grad_norm": 0.114151731133461, "learning_rate": 4.6614929434159584e-05, "loss": 7.0068, "step": 13780 }, { "epoch": 0.10819672131147541, "grad_norm": 0.12061171978712082, "learning_rate": 4.6594090183960114e-05, "loss": 7.0103, "step": 13790 }, { "epoch": 0.10844892812105927, "grad_norm": 0.11540930718183517, "learning_rate": 4.6573239390430896e-05, "loss": 7.0073, "step": 13800 }, { "epoch": 0.10870113493064312, "grad_norm": 0.12751227617263794, "learning_rate": 4.6552377068076325e-05, "loss": 7.0112, "step": 13810 }, { "epoch": 0.10895334174022699, "grad_norm": 0.12176912277936935, "learning_rate": 4.653150323140887e-05, "loss": 7.0148, "step": 13820 }, { "epoch": 0.10920554854981085, "grad_norm": 0.12961791455745697, "learning_rate": 4.6510617894948945e-05, "loss": 7.0174, "step": 13830 }, { "epoch": 0.1094577553593947, "grad_norm": 0.138155996799469, "learning_rate": 4.648972107322503e-05, "loss": 7.021, "step": 13840 }, { "epoch": 0.10970996216897856, "grad_norm": 0.13077591359615326, "learning_rate": 4.646881278077352e-05, "loss": 7.0206, "step": 13850 }, { "epoch": 0.10996216897856242, "grad_norm": 0.12179679423570633, "learning_rate": 4.644789303213885e-05, "loss": 7.0197, "step": 13860 }, { "epoch": 0.11021437578814627, "grad_norm": 0.1431414932012558, "learning_rate": 4.6426961841873406e-05, "loss": 7.0301, "step": 13870 }, { "epoch": 0.11046658259773014, "grad_norm": 0.12679535150527954, "learning_rate": 4.6406019224537526e-05, "loss": 7.0202, "step": 13880 }, { "epoch": 0.110718789407314, "grad_norm": 0.13005679845809937, "learning_rate": 4.6385065194699496e-05, "loss": 7.022, "step": 13890 }, { "epoch": 0.11097099621689786, "grad_norm": 0.13099455833435059, "learning_rate": 4.636409976693555e-05, "loss": 7.0265, "step": 13900 }, { "epoch": 0.11122320302648171, "grad_norm": 0.12408564984798431, "learning_rate": 4.634312295582985e-05, "loss": 7.0175, "step": 13910 }, { "epoch": 0.11147540983606558, "grad_norm": 0.1316472887992859, "learning_rate": 4.632213477597446e-05, "loss": 7.014, "step": 13920 }, { "epoch": 0.11172761664564944, "grad_norm": 0.12396996468305588, "learning_rate": 4.6301135241969393e-05, "loss": 7.0189, "step": 13930 }, { "epoch": 0.11197982345523329, "grad_norm": 0.11735033243894577, "learning_rate": 4.62801243684225e-05, "loss": 7.0159, "step": 13940 }, { "epoch": 0.11223203026481715, "grad_norm": 0.12963734567165375, "learning_rate": 4.6259102169949576e-05, "loss": 7.0147, "step": 13950 }, { "epoch": 0.11248423707440101, "grad_norm": 0.12004870921373367, "learning_rate": 4.623806866117427e-05, "loss": 7.0159, "step": 13960 }, { "epoch": 0.11273644388398486, "grad_norm": 0.13015472888946533, "learning_rate": 4.621702385672808e-05, "loss": 7.017, "step": 13970 }, { "epoch": 0.11298865069356873, "grad_norm": 0.12138278782367706, "learning_rate": 4.61959677712504e-05, "loss": 7.0167, "step": 13980 }, { "epoch": 0.11324085750315259, "grad_norm": 0.12545333802700043, "learning_rate": 4.617490041938845e-05, "loss": 7.0129, "step": 13990 }, { "epoch": 0.11349306431273644, "grad_norm": 0.12214142829179764, "learning_rate": 4.6153821815797295e-05, "loss": 7.0135, "step": 14000 }, { "epoch": 0.1137452711223203, "grad_norm": 0.11896806955337524, "learning_rate": 4.6132731975139814e-05, "loss": 7.0101, "step": 14010 }, { "epoch": 0.11399747793190416, "grad_norm": 0.11881226301193237, "learning_rate": 4.6111630912086714e-05, "loss": 7.0156, "step": 14020 }, { "epoch": 0.11424968474148801, "grad_norm": 0.12421417981386185, "learning_rate": 4.60905186413165e-05, "loss": 7.0106, "step": 14030 }, { "epoch": 0.11450189155107188, "grad_norm": 0.11625563353300095, "learning_rate": 4.606939517751549e-05, "loss": 7.011, "step": 14040 }, { "epoch": 0.11475409836065574, "grad_norm": 0.1292886734008789, "learning_rate": 4.604826053537776e-05, "loss": 7.0143, "step": 14050 }, { "epoch": 0.1150063051702396, "grad_norm": 0.12828432023525238, "learning_rate": 4.602711472960519e-05, "loss": 7.0205, "step": 14060 }, { "epoch": 0.11525851197982345, "grad_norm": 0.12840358912944794, "learning_rate": 4.600595777490743e-05, "loss": 7.0214, "step": 14070 }, { "epoch": 0.11551071878940732, "grad_norm": 0.13680380582809448, "learning_rate": 4.5984789686001844e-05, "loss": 7.0219, "step": 14080 }, { "epoch": 0.11576292559899118, "grad_norm": 0.11115366965532303, "learning_rate": 4.596361047761358e-05, "loss": 7.0164, "step": 14090 }, { "epoch": 0.11601513240857503, "grad_norm": 0.10105358064174652, "learning_rate": 4.59424201644755e-05, "loss": 7.0064, "step": 14100 }, { "epoch": 0.11626733921815889, "grad_norm": 0.12608879804611206, "learning_rate": 4.592121876132822e-05, "loss": 7.0148, "step": 14110 }, { "epoch": 0.11651954602774275, "grad_norm": 0.11941909790039062, "learning_rate": 4.590000628292003e-05, "loss": 7.0166, "step": 14120 }, { "epoch": 0.1167717528373266, "grad_norm": 0.10672641545534134, "learning_rate": 4.587878274400696e-05, "loss": 7.0089, "step": 14130 }, { "epoch": 0.11702395964691047, "grad_norm": 0.1275121420621872, "learning_rate": 4.5857548159352705e-05, "loss": 7.0096, "step": 14140 }, { "epoch": 0.11727616645649433, "grad_norm": 0.1178092285990715, "learning_rate": 4.583630254372866e-05, "loss": 7.0192, "step": 14150 }, { "epoch": 0.11752837326607818, "grad_norm": 0.099721260368824, "learning_rate": 4.581504591191389e-05, "loss": 7.0067, "step": 14160 }, { "epoch": 0.11778058007566204, "grad_norm": 0.1006883829832077, "learning_rate": 4.5793778278695146e-05, "loss": 7.0059, "step": 14170 }, { "epoch": 0.1180327868852459, "grad_norm": 0.1029214859008789, "learning_rate": 4.5772499658866773e-05, "loss": 6.994, "step": 14180 }, { "epoch": 0.11828499369482975, "grad_norm": 0.091046042740345, "learning_rate": 4.575121006723082e-05, "loss": 6.9961, "step": 14190 }, { "epoch": 0.11853720050441362, "grad_norm": 0.10820841044187546, "learning_rate": 4.5729909518596934e-05, "loss": 6.9957, "step": 14200 }, { "epoch": 0.11878940731399748, "grad_norm": 0.12020259350538254, "learning_rate": 4.570859802778239e-05, "loss": 7.0034, "step": 14210 }, { "epoch": 0.11904161412358134, "grad_norm": 0.12367302924394608, "learning_rate": 4.568727560961209e-05, "loss": 7.0133, "step": 14220 }, { "epoch": 0.11929382093316519, "grad_norm": 0.13015200197696686, "learning_rate": 4.566594227891851e-05, "loss": 7.0175, "step": 14230 }, { "epoch": 0.11954602774274906, "grad_norm": 0.12000835686922073, "learning_rate": 4.564459805054175e-05, "loss": 7.0185, "step": 14240 }, { "epoch": 0.11979823455233292, "grad_norm": 0.1257222294807434, "learning_rate": 4.5623242939329474e-05, "loss": 7.0189, "step": 14250 }, { "epoch": 0.12005044136191677, "grad_norm": 0.12180405855178833, "learning_rate": 4.56018769601369e-05, "loss": 7.015, "step": 14260 }, { "epoch": 0.12030264817150063, "grad_norm": 0.10936957597732544, "learning_rate": 4.558050012782683e-05, "loss": 7.0137, "step": 14270 }, { "epoch": 0.1205548549810845, "grad_norm": 0.11896359920501709, "learning_rate": 4.555911245726961e-05, "loss": 7.0062, "step": 14280 }, { "epoch": 0.12080706179066834, "grad_norm": 0.12019521743059158, "learning_rate": 4.553771396334311e-05, "loss": 7.0108, "step": 14290 }, { "epoch": 0.1210592686002522, "grad_norm": 0.12689203023910522, "learning_rate": 4.551630466093277e-05, "loss": 7.0169, "step": 14300 }, { "epoch": 0.12131147540983607, "grad_norm": 0.1275954395532608, "learning_rate": 4.549488456493149e-05, "loss": 7.0197, "step": 14310 }, { "epoch": 0.12156368221941992, "grad_norm": 0.12618650496006012, "learning_rate": 4.547345369023972e-05, "loss": 7.0236, "step": 14320 }, { "epoch": 0.12181588902900378, "grad_norm": 0.11486010998487473, "learning_rate": 4.545201205176541e-05, "loss": 7.0155, "step": 14330 }, { "epoch": 0.12206809583858765, "grad_norm": 0.1137692928314209, "learning_rate": 4.543055966442397e-05, "loss": 7.0116, "step": 14340 }, { "epoch": 0.1223203026481715, "grad_norm": 0.12385028600692749, "learning_rate": 4.540909654313831e-05, "loss": 7.0114, "step": 14350 }, { "epoch": 0.12257250945775536, "grad_norm": 0.11937599629163742, "learning_rate": 4.5387622702838794e-05, "loss": 7.0136, "step": 14360 }, { "epoch": 0.12282471626733922, "grad_norm": 0.11043809354305267, "learning_rate": 4.536613815846324e-05, "loss": 7.0104, "step": 14370 }, { "epoch": 0.12307692307692308, "grad_norm": 0.12261407822370529, "learning_rate": 4.534464292495694e-05, "loss": 7.0054, "step": 14380 }, { "epoch": 0.12332912988650693, "grad_norm": 0.12694193422794342, "learning_rate": 4.532313701727258e-05, "loss": 7.0146, "step": 14390 }, { "epoch": 0.1235813366960908, "grad_norm": 0.13718228042125702, "learning_rate": 4.53016204503703e-05, "loss": 7.0249, "step": 14400 }, { "epoch": 0.12383354350567466, "grad_norm": 0.11755913496017456, "learning_rate": 4.5280093239217664e-05, "loss": 7.0137, "step": 14410 }, { "epoch": 0.12408575031525851, "grad_norm": 0.1197163537144661, "learning_rate": 4.525855539878959e-05, "loss": 7.0089, "step": 14420 }, { "epoch": 0.12433795712484237, "grad_norm": 0.11093920469284058, "learning_rate": 4.523700694406845e-05, "loss": 7.009, "step": 14430 }, { "epoch": 0.12459016393442623, "grad_norm": 0.10067468881607056, "learning_rate": 4.5215447890043955e-05, "loss": 7.0024, "step": 14440 }, { "epoch": 0.12484237074401008, "grad_norm": 0.1108691543340683, "learning_rate": 4.519387825171323e-05, "loss": 7.0016, "step": 14450 }, { "epoch": 0.12509457755359396, "grad_norm": 0.10741101205348969, "learning_rate": 4.517229804408072e-05, "loss": 7.0076, "step": 14460 }, { "epoch": 0.1253467843631778, "grad_norm": 0.12888409197330475, "learning_rate": 4.515070728215825e-05, "loss": 7.0138, "step": 14470 }, { "epoch": 0.12559899117276166, "grad_norm": 0.12196345627307892, "learning_rate": 4.5129105980964985e-05, "loss": 7.0194, "step": 14480 }, { "epoch": 0.12585119798234554, "grad_norm": 0.12034141272306442, "learning_rate": 4.5107494155527404e-05, "loss": 7.0156, "step": 14490 }, { "epoch": 0.12610340479192939, "grad_norm": 0.12285523861646652, "learning_rate": 4.508587182087933e-05, "loss": 7.018, "step": 14500 }, { "epoch": 0.12635561160151323, "grad_norm": 0.11201721429824829, "learning_rate": 4.506423899206187e-05, "loss": 7.0108, "step": 14510 }, { "epoch": 0.1266078184110971, "grad_norm": 0.10984145104885101, "learning_rate": 4.5042595684123474e-05, "loss": 7.0064, "step": 14520 }, { "epoch": 0.12686002522068096, "grad_norm": 0.12177331000566483, "learning_rate": 4.502094191211982e-05, "loss": 7.0137, "step": 14530 }, { "epoch": 0.1271122320302648, "grad_norm": 0.12053750455379486, "learning_rate": 4.499927769111392e-05, "loss": 7.0121, "step": 14540 }, { "epoch": 0.1273644388398487, "grad_norm": 0.12006832659244537, "learning_rate": 4.497760303617603e-05, "loss": 7.0143, "step": 14550 }, { "epoch": 0.12761664564943254, "grad_norm": 0.1100648045539856, "learning_rate": 4.4955917962383663e-05, "loss": 7.0087, "step": 14560 }, { "epoch": 0.12786885245901639, "grad_norm": 0.12505172193050385, "learning_rate": 4.493422248482159e-05, "loss": 7.0147, "step": 14570 }, { "epoch": 0.12812105926860026, "grad_norm": 0.12379957735538483, "learning_rate": 4.49125166185818e-05, "loss": 7.0181, "step": 14580 }, { "epoch": 0.1283732660781841, "grad_norm": 0.11184598505496979, "learning_rate": 4.489080037876354e-05, "loss": 7.0151, "step": 14590 }, { "epoch": 0.12862547288776796, "grad_norm": 0.124532051384449, "learning_rate": 4.486907378047326e-05, "loss": 7.0175, "step": 14600 }, { "epoch": 0.12887767969735184, "grad_norm": 0.13460932672023773, "learning_rate": 4.4847336838824593e-05, "loss": 7.0222, "step": 14610 }, { "epoch": 0.1291298865069357, "grad_norm": 0.10322148352861404, "learning_rate": 4.482558956893839e-05, "loss": 7.0129, "step": 14620 }, { "epoch": 0.12938209331651954, "grad_norm": 0.09482371062040329, "learning_rate": 4.4803831985942676e-05, "loss": 6.9968, "step": 14630 }, { "epoch": 0.1296343001261034, "grad_norm": 0.10468240082263947, "learning_rate": 4.4782064104972686e-05, "loss": 6.9988, "step": 14640 }, { "epoch": 0.12988650693568726, "grad_norm": 0.11570216715335846, "learning_rate": 4.476028594117075e-05, "loss": 7.0028, "step": 14650 }, { "epoch": 0.1301387137452711, "grad_norm": 0.13197441399097443, "learning_rate": 4.4738497509686406e-05, "loss": 7.014, "step": 14660 }, { "epoch": 0.130390920554855, "grad_norm": 0.10406981408596039, "learning_rate": 4.471669882567632e-05, "loss": 7.0103, "step": 14670 }, { "epoch": 0.13064312736443884, "grad_norm": 0.11888687312602997, "learning_rate": 4.469488990430429e-05, "loss": 7.0098, "step": 14680 }, { "epoch": 0.1308953341740227, "grad_norm": 0.12856970727443695, "learning_rate": 4.4673070760741224e-05, "loss": 7.0171, "step": 14690 }, { "epoch": 0.13114754098360656, "grad_norm": 0.1040421724319458, "learning_rate": 4.4651241410165157e-05, "loss": 7.0022, "step": 14700 }, { "epoch": 0.1313997477931904, "grad_norm": 0.11802996695041656, "learning_rate": 4.462940186776121e-05, "loss": 7.0101, "step": 14710 }, { "epoch": 0.13165195460277426, "grad_norm": 0.11060959100723267, "learning_rate": 4.4607552148721616e-05, "loss": 7.0121, "step": 14720 }, { "epoch": 0.13190416141235814, "grad_norm": 0.12366527318954468, "learning_rate": 4.458569226824565e-05, "loss": 7.0104, "step": 14730 }, { "epoch": 0.132156368221942, "grad_norm": 0.10217763483524323, "learning_rate": 4.4563822241539714e-05, "loss": 6.9996, "step": 14740 }, { "epoch": 0.13240857503152584, "grad_norm": 0.12219315022230148, "learning_rate": 4.4541942083817186e-05, "loss": 7.0055, "step": 14750 }, { "epoch": 0.13266078184110972, "grad_norm": 0.11602602899074554, "learning_rate": 4.452005181029857e-05, "loss": 7.0123, "step": 14760 }, { "epoch": 0.13291298865069356, "grad_norm": 0.12263442575931549, "learning_rate": 4.449815143621138e-05, "loss": 7.0108, "step": 14770 }, { "epoch": 0.13316519546027744, "grad_norm": 0.12329389154911041, "learning_rate": 4.4476240976790116e-05, "loss": 7.0172, "step": 14780 }, { "epoch": 0.1334174022698613, "grad_norm": 0.1161542683839798, "learning_rate": 4.445432044727636e-05, "loss": 7.0169, "step": 14790 }, { "epoch": 0.13366960907944514, "grad_norm": 0.1114954873919487, "learning_rate": 4.4432389862918645e-05, "loss": 7.0086, "step": 14800 }, { "epoch": 0.13392181588902902, "grad_norm": 0.11269606649875641, "learning_rate": 4.4410449238972536e-05, "loss": 7.0113, "step": 14810 }, { "epoch": 0.13417402269861287, "grad_norm": 0.12133883684873581, "learning_rate": 4.438849859070056e-05, "loss": 7.0151, "step": 14820 }, { "epoch": 0.13442622950819672, "grad_norm": 0.11403359472751617, "learning_rate": 4.4366537933372226e-05, "loss": 7.0101, "step": 14830 }, { "epoch": 0.1346784363177806, "grad_norm": 0.11128561943769455, "learning_rate": 4.4344567282264e-05, "loss": 7.0053, "step": 14840 }, { "epoch": 0.13493064312736444, "grad_norm": 0.1077156588435173, "learning_rate": 4.4322586652659306e-05, "loss": 7.0096, "step": 14850 }, { "epoch": 0.1351828499369483, "grad_norm": 0.11573804914951324, "learning_rate": 4.4300596059848505e-05, "loss": 7.0075, "step": 14860 }, { "epoch": 0.13543505674653217, "grad_norm": 0.1148797795176506, "learning_rate": 4.4278595519128895e-05, "loss": 7.0084, "step": 14870 }, { "epoch": 0.13568726355611602, "grad_norm": 0.11898864805698395, "learning_rate": 4.425658504580469e-05, "loss": 7.0105, "step": 14880 }, { "epoch": 0.13593947036569987, "grad_norm": 0.1235952377319336, "learning_rate": 4.4234564655187e-05, "loss": 7.0124, "step": 14890 }, { "epoch": 0.13619167717528374, "grad_norm": 0.10562942922115326, "learning_rate": 4.421253436259386e-05, "loss": 7.0013, "step": 14900 }, { "epoch": 0.1364438839848676, "grad_norm": 0.11567962169647217, "learning_rate": 4.4190494183350175e-05, "loss": 7.0067, "step": 14910 }, { "epoch": 0.13669609079445144, "grad_norm": 0.11789346486330032, "learning_rate": 4.416844413278772e-05, "loss": 7.0064, "step": 14920 }, { "epoch": 0.13694829760403532, "grad_norm": 0.11536281555891037, "learning_rate": 4.4146384226245174e-05, "loss": 7.0101, "step": 14930 }, { "epoch": 0.13720050441361917, "grad_norm": 0.10839681327342987, "learning_rate": 4.412431447906803e-05, "loss": 7.0059, "step": 14940 }, { "epoch": 0.13745271122320302, "grad_norm": 0.1113009974360466, "learning_rate": 4.410223490660864e-05, "loss": 7.0089, "step": 14950 }, { "epoch": 0.1377049180327869, "grad_norm": 0.12575745582580566, "learning_rate": 4.408014552422621e-05, "loss": 7.0142, "step": 14960 }, { "epoch": 0.13795712484237074, "grad_norm": 0.11435794085264206, "learning_rate": 4.405804634728674e-05, "loss": 7.0154, "step": 14970 }, { "epoch": 0.1382093316519546, "grad_norm": 0.10832410305738449, "learning_rate": 4.4035937391163065e-05, "loss": 7.0067, "step": 14980 }, { "epoch": 0.13846153846153847, "grad_norm": 0.12921763956546783, "learning_rate": 4.401381867123481e-05, "loss": 7.0125, "step": 14990 }, { "epoch": 0.13871374527112232, "grad_norm": 0.10728513449430466, "learning_rate": 4.39916902028884e-05, "loss": 7.0119, "step": 15000 }, { "epoch": 0.13896595208070617, "grad_norm": 0.11391110718250275, "learning_rate": 4.3969552001517045e-05, "loss": 7.0086, "step": 15010 }, { "epoch": 0.13921815889029004, "grad_norm": 0.1051936149597168, "learning_rate": 4.3947404082520704e-05, "loss": 7.0041, "step": 15020 }, { "epoch": 0.1394703656998739, "grad_norm": 0.11372477561235428, "learning_rate": 4.392524646130613e-05, "loss": 7.0031, "step": 15030 }, { "epoch": 0.13972257250945774, "grad_norm": 0.11591523140668869, "learning_rate": 4.390307915328678e-05, "loss": 7.0047, "step": 15040 }, { "epoch": 0.13997477931904162, "grad_norm": 0.1110263392329216, "learning_rate": 4.3880902173882895e-05, "loss": 7.0094, "step": 15050 }, { "epoch": 0.14022698612862547, "grad_norm": 0.11770636588335037, "learning_rate": 4.3858715538521425e-05, "loss": 7.0091, "step": 15060 }, { "epoch": 0.14047919293820932, "grad_norm": 0.11594939976930618, "learning_rate": 4.3836519262636035e-05, "loss": 7.009, "step": 15070 }, { "epoch": 0.1407313997477932, "grad_norm": 0.10995303094387054, "learning_rate": 4.381431336166709e-05, "loss": 7.0027, "step": 15080 }, { "epoch": 0.14098360655737704, "grad_norm": 0.1028287336230278, "learning_rate": 4.379209785106166e-05, "loss": 7.0054, "step": 15090 }, { "epoch": 0.14123581336696092, "grad_norm": 0.10080797225236893, "learning_rate": 4.376987274627349e-05, "loss": 6.9986, "step": 15100 }, { "epoch": 0.14148802017654477, "grad_norm": 0.11648160964250565, "learning_rate": 4.374763806276304e-05, "loss": 7.0046, "step": 15110 }, { "epoch": 0.14174022698612862, "grad_norm": 0.11777978390455246, "learning_rate": 4.3725393815997356e-05, "loss": 7.0173, "step": 15120 }, { "epoch": 0.1419924337957125, "grad_norm": 0.12924890220165253, "learning_rate": 4.37031400214502e-05, "loss": 7.0194, "step": 15130 }, { "epoch": 0.14224464060529635, "grad_norm": 0.12583915889263153, "learning_rate": 4.368087669460197e-05, "loss": 7.0235, "step": 15140 }, { "epoch": 0.1424968474148802, "grad_norm": 0.1306135058403015, "learning_rate": 4.365860385093965e-05, "loss": 7.0235, "step": 15150 }, { "epoch": 0.14274905422446407, "grad_norm": 0.12636564671993256, "learning_rate": 4.3636321505956915e-05, "loss": 7.022, "step": 15160 }, { "epoch": 0.14300126103404792, "grad_norm": 0.12276087701320648, "learning_rate": 4.361402967515397e-05, "loss": 7.0211, "step": 15170 }, { "epoch": 0.14325346784363177, "grad_norm": 0.1237574890255928, "learning_rate": 4.3591728374037676e-05, "loss": 7.0171, "step": 15180 }, { "epoch": 0.14350567465321565, "grad_norm": 0.11635987460613251, "learning_rate": 4.356941761812146e-05, "loss": 7.0147, "step": 15190 }, { "epoch": 0.1437578814627995, "grad_norm": 0.12257160246372223, "learning_rate": 4.354709742292533e-05, "loss": 7.0123, "step": 15200 }, { "epoch": 0.14401008827238335, "grad_norm": 0.1197953149676323, "learning_rate": 4.352476780397588e-05, "loss": 7.0157, "step": 15210 }, { "epoch": 0.14426229508196722, "grad_norm": 0.1038358062505722, "learning_rate": 4.350242877680621e-05, "loss": 7.0006, "step": 15220 }, { "epoch": 0.14451450189155107, "grad_norm": 0.1092543751001358, "learning_rate": 4.3480080356956004e-05, "loss": 6.9977, "step": 15230 }, { "epoch": 0.14476670870113492, "grad_norm": 0.10290288925170898, "learning_rate": 4.3457722559971485e-05, "loss": 7.004, "step": 15240 }, { "epoch": 0.1450189155107188, "grad_norm": 0.10887181013822556, "learning_rate": 4.343535540140537e-05, "loss": 7.0059, "step": 15250 }, { "epoch": 0.14527112232030265, "grad_norm": 0.10834600031375885, "learning_rate": 4.34129788968169e-05, "loss": 7.0079, "step": 15260 }, { "epoch": 0.1455233291298865, "grad_norm": 0.10003656893968582, "learning_rate": 4.339059306177183e-05, "loss": 6.999, "step": 15270 }, { "epoch": 0.14577553593947037, "grad_norm": 0.10638033598661423, "learning_rate": 4.336819791184237e-05, "loss": 7.0019, "step": 15280 }, { "epoch": 0.14602774274905422, "grad_norm": 0.11522512882947922, "learning_rate": 4.334579346260728e-05, "loss": 7.0077, "step": 15290 }, { "epoch": 0.14627994955863807, "grad_norm": 0.11828053742647171, "learning_rate": 4.3323379729651706e-05, "loss": 7.0122, "step": 15300 }, { "epoch": 0.14653215636822195, "grad_norm": 0.10348565876483917, "learning_rate": 4.33009567285673e-05, "loss": 7.01, "step": 15310 }, { "epoch": 0.1467843631778058, "grad_norm": 0.09378072619438171, "learning_rate": 4.3278524474952165e-05, "loss": 7.0, "step": 15320 }, { "epoch": 0.14703656998738965, "grad_norm": 0.1121789813041687, "learning_rate": 4.32560829844108e-05, "loss": 7.0044, "step": 15330 }, { "epoch": 0.14728877679697353, "grad_norm": 0.09856068342924118, "learning_rate": 4.323363227255418e-05, "loss": 6.9984, "step": 15340 }, { "epoch": 0.14754098360655737, "grad_norm": 0.10420440882444382, "learning_rate": 4.321117235499966e-05, "loss": 6.9969, "step": 15350 }, { "epoch": 0.14779319041614122, "grad_norm": 0.10563419759273529, "learning_rate": 4.3188703247370995e-05, "loss": 6.9996, "step": 15360 }, { "epoch": 0.1480453972257251, "grad_norm": 0.11881881207227707, "learning_rate": 4.316622496529838e-05, "loss": 7.011, "step": 15370 }, { "epoch": 0.14829760403530895, "grad_norm": 0.12180481851100922, "learning_rate": 4.3143737524418345e-05, "loss": 7.0177, "step": 15380 }, { "epoch": 0.1485498108448928, "grad_norm": 0.12017661333084106, "learning_rate": 4.312124094037379e-05, "loss": 7.0197, "step": 15390 }, { "epoch": 0.14880201765447668, "grad_norm": 0.1205824688076973, "learning_rate": 4.3098735228814e-05, "loss": 7.016, "step": 15400 }, { "epoch": 0.14905422446406053, "grad_norm": 0.11189217120409012, "learning_rate": 4.3076220405394604e-05, "loss": 7.0154, "step": 15410 }, { "epoch": 0.1493064312736444, "grad_norm": 0.11641347408294678, "learning_rate": 4.305369648577757e-05, "loss": 7.0148, "step": 15420 }, { "epoch": 0.14955863808322825, "grad_norm": 0.10011029988527298, "learning_rate": 4.303116348563116e-05, "loss": 6.9989, "step": 15430 }, { "epoch": 0.1498108448928121, "grad_norm": 0.10609572380781174, "learning_rate": 4.300862142063002e-05, "loss": 7.0013, "step": 15440 }, { "epoch": 0.15006305170239598, "grad_norm": 0.10286098718643188, "learning_rate": 4.2986070306455046e-05, "loss": 7.0022, "step": 15450 }, { "epoch": 0.15031525851197983, "grad_norm": 0.12421179562807083, "learning_rate": 4.296351015879344e-05, "loss": 7.0156, "step": 15460 }, { "epoch": 0.15056746532156368, "grad_norm": 0.11100196838378906, "learning_rate": 4.294094099333871e-05, "loss": 7.0126, "step": 15470 }, { "epoch": 0.15081967213114755, "grad_norm": 0.1120496317744255, "learning_rate": 4.29183628257906e-05, "loss": 7.0085, "step": 15480 }, { "epoch": 0.1510718789407314, "grad_norm": 0.11131096631288528, "learning_rate": 4.289577567185516e-05, "loss": 7.0101, "step": 15490 }, { "epoch": 0.15132408575031525, "grad_norm": 0.1059059426188469, "learning_rate": 4.287317954724466e-05, "loss": 7.0068, "step": 15500 }, { "epoch": 0.15157629255989913, "grad_norm": 0.12128554284572601, "learning_rate": 4.285057446767761e-05, "loss": 7.0078, "step": 15510 }, { "epoch": 0.15182849936948298, "grad_norm": 0.11580874025821686, "learning_rate": 4.2827960448878755e-05, "loss": 7.0121, "step": 15520 }, { "epoch": 0.15208070617906683, "grad_norm": 0.10521107167005539, "learning_rate": 4.280533750657909e-05, "loss": 7.0074, "step": 15530 }, { "epoch": 0.1523329129886507, "grad_norm": 0.10503160208463669, "learning_rate": 4.2782705656515756e-05, "loss": 7.004, "step": 15540 }, { "epoch": 0.15258511979823455, "grad_norm": 0.11529470980167389, "learning_rate": 4.2760064914432144e-05, "loss": 7.0118, "step": 15550 }, { "epoch": 0.1528373266078184, "grad_norm": 0.10214810818433762, "learning_rate": 4.2737415296077795e-05, "loss": 7.0047, "step": 15560 }, { "epoch": 0.15308953341740228, "grad_norm": 0.10718594491481781, "learning_rate": 4.271475681720845e-05, "loss": 7.0021, "step": 15570 }, { "epoch": 0.15334174022698613, "grad_norm": 0.11566676199436188, "learning_rate": 4.269208949358601e-05, "loss": 7.0027, "step": 15580 }, { "epoch": 0.15359394703656998, "grad_norm": 0.12218378484249115, "learning_rate": 4.2669413340978504e-05, "loss": 7.0059, "step": 15590 }, { "epoch": 0.15384615384615385, "grad_norm": 0.10079603642225266, "learning_rate": 4.264672837516013e-05, "loss": 7.0076, "step": 15600 }, { "epoch": 0.1540983606557377, "grad_norm": 0.09170334786176682, "learning_rate": 4.262403461191121e-05, "loss": 7.0001, "step": 15610 }, { "epoch": 0.15435056746532155, "grad_norm": 0.08506650477647781, "learning_rate": 4.260133206701817e-05, "loss": 6.9897, "step": 15620 }, { "epoch": 0.15460277427490543, "grad_norm": 0.09536772966384888, "learning_rate": 4.2578620756273584e-05, "loss": 6.9976, "step": 15630 }, { "epoch": 0.15485498108448928, "grad_norm": 0.11179976910352707, "learning_rate": 4.255590069547607e-05, "loss": 7.0081, "step": 15640 }, { "epoch": 0.15510718789407313, "grad_norm": 0.10680782049894333, "learning_rate": 4.253317190043037e-05, "loss": 7.0093, "step": 15650 }, { "epoch": 0.155359394703657, "grad_norm": 0.09551562368869781, "learning_rate": 4.2510434386947295e-05, "loss": 7.0031, "step": 15660 }, { "epoch": 0.15561160151324085, "grad_norm": 0.10451715439558029, "learning_rate": 4.248768817084372e-05, "loss": 7.0037, "step": 15670 }, { "epoch": 0.1558638083228247, "grad_norm": 0.10719973593950272, "learning_rate": 4.246493326794256e-05, "loss": 7.0121, "step": 15680 }, { "epoch": 0.15611601513240858, "grad_norm": 0.11794453859329224, "learning_rate": 4.24421696940728e-05, "loss": 7.0104, "step": 15690 }, { "epoch": 0.15636822194199243, "grad_norm": 0.10505248606204987, "learning_rate": 4.2419397465069424e-05, "loss": 6.9989, "step": 15700 }, { "epoch": 0.15662042875157628, "grad_norm": 0.11368047446012497, "learning_rate": 4.2396616596773475e-05, "loss": 7.0071, "step": 15710 }, { "epoch": 0.15687263556116016, "grad_norm": 0.11622785031795502, "learning_rate": 4.237382710503196e-05, "loss": 7.0099, "step": 15720 }, { "epoch": 0.157124842370744, "grad_norm": 0.09782916307449341, "learning_rate": 4.235102900569794e-05, "loss": 7.0077, "step": 15730 }, { "epoch": 0.15737704918032788, "grad_norm": 0.11070048809051514, "learning_rate": 4.23282223146304e-05, "loss": 7.0067, "step": 15740 }, { "epoch": 0.15762925598991173, "grad_norm": 0.10315890610218048, "learning_rate": 4.230540704769436e-05, "loss": 7.0036, "step": 15750 }, { "epoch": 0.15788146279949558, "grad_norm": 0.10521839559078217, "learning_rate": 4.228258322076078e-05, "loss": 7.0081, "step": 15760 }, { "epoch": 0.15813366960907946, "grad_norm": 0.11457584798336029, "learning_rate": 4.225975084970655e-05, "loss": 7.0057, "step": 15770 }, { "epoch": 0.1583858764186633, "grad_norm": 0.11060737073421478, "learning_rate": 4.223690995041456e-05, "loss": 7.0067, "step": 15780 }, { "epoch": 0.15863808322824716, "grad_norm": 0.11623918265104294, "learning_rate": 4.221406053877358e-05, "loss": 7.0119, "step": 15790 }, { "epoch": 0.15889029003783103, "grad_norm": 0.11131274700164795, "learning_rate": 4.219120263067832e-05, "loss": 7.0093, "step": 15800 }, { "epoch": 0.15914249684741488, "grad_norm": 0.11486254632472992, "learning_rate": 4.2168336242029425e-05, "loss": 7.0089, "step": 15810 }, { "epoch": 0.15939470365699873, "grad_norm": 0.10987652093172073, "learning_rate": 4.2145461388733396e-05, "loss": 7.0087, "step": 15820 }, { "epoch": 0.1596469104665826, "grad_norm": 0.11201102286577225, "learning_rate": 4.2122578086702654e-05, "loss": 7.0095, "step": 15830 }, { "epoch": 0.15989911727616646, "grad_norm": 0.10718180239200592, "learning_rate": 4.209968635185548e-05, "loss": 7.006, "step": 15840 }, { "epoch": 0.1601513240857503, "grad_norm": 0.1179196685552597, "learning_rate": 4.207678620011604e-05, "loss": 7.0064, "step": 15850 }, { "epoch": 0.16040353089533418, "grad_norm": 0.1049872562289238, "learning_rate": 4.205387764741434e-05, "loss": 7.0059, "step": 15860 }, { "epoch": 0.16065573770491803, "grad_norm": 0.0954735204577446, "learning_rate": 4.203096070968622e-05, "loss": 6.9986, "step": 15870 }, { "epoch": 0.16090794451450188, "grad_norm": 0.103877492249012, "learning_rate": 4.200803540287336e-05, "loss": 7.0013, "step": 15880 }, { "epoch": 0.16116015132408576, "grad_norm": 0.11135512590408325, "learning_rate": 4.198510174292331e-05, "loss": 7.0006, "step": 15890 }, { "epoch": 0.1614123581336696, "grad_norm": 0.11635250598192215, "learning_rate": 4.196215974578933e-05, "loss": 7.0043, "step": 15900 }, { "epoch": 0.16166456494325346, "grad_norm": 0.11099542677402496, "learning_rate": 4.193920942743058e-05, "loss": 7.0074, "step": 15910 }, { "epoch": 0.16191677175283734, "grad_norm": 0.10195672512054443, "learning_rate": 4.191625080381195e-05, "loss": 7.0015, "step": 15920 }, { "epoch": 0.16216897856242118, "grad_norm": 0.11140645295381546, "learning_rate": 4.189328389090412e-05, "loss": 7.0042, "step": 15930 }, { "epoch": 0.16242118537200503, "grad_norm": 0.10792161524295807, "learning_rate": 4.1870308704683565e-05, "loss": 7.0089, "step": 15940 }, { "epoch": 0.1626733921815889, "grad_norm": 0.09852690249681473, "learning_rate": 4.184732526113245e-05, "loss": 7.0019, "step": 15950 }, { "epoch": 0.16292559899117276, "grad_norm": 0.09102994203567505, "learning_rate": 4.182433357623875e-05, "loss": 6.9971, "step": 15960 }, { "epoch": 0.1631778058007566, "grad_norm": 0.09793549031019211, "learning_rate": 4.1801333665996154e-05, "loss": 6.9968, "step": 15970 }, { "epoch": 0.16343001261034049, "grad_norm": 0.1065673902630806, "learning_rate": 4.177832554640404e-05, "loss": 7.0026, "step": 15980 }, { "epoch": 0.16368221941992434, "grad_norm": 0.1151038333773613, "learning_rate": 4.175530923346754e-05, "loss": 7.009, "step": 15990 }, { "epoch": 0.16393442622950818, "grad_norm": 0.09202979505062103, "learning_rate": 4.173228474319748e-05, "loss": 7.0032, "step": 16000 }, { "epoch": 0.16418663303909206, "grad_norm": 0.10128271579742432, "learning_rate": 4.1709252091610326e-05, "loss": 7.0016, "step": 16010 }, { "epoch": 0.1644388398486759, "grad_norm": 0.10640598088502884, "learning_rate": 4.168621129472828e-05, "loss": 7.0083, "step": 16020 }, { "epoch": 0.16469104665825976, "grad_norm": 0.1077645868062973, "learning_rate": 4.1663162368579184e-05, "loss": 7.0132, "step": 16030 }, { "epoch": 0.16494325346784364, "grad_norm": 0.10020864754915237, "learning_rate": 4.1640105329196544e-05, "loss": 7.0111, "step": 16040 }, { "epoch": 0.16519546027742749, "grad_norm": 0.11751843988895416, "learning_rate": 4.161704019261951e-05, "loss": 7.0109, "step": 16050 }, { "epoch": 0.16544766708701136, "grad_norm": 0.10692572593688965, "learning_rate": 4.159396697489284e-05, "loss": 7.0087, "step": 16060 }, { "epoch": 0.1656998738965952, "grad_norm": 0.11405492573976517, "learning_rate": 4.157088569206693e-05, "loss": 7.004, "step": 16070 }, { "epoch": 0.16595208070617906, "grad_norm": 0.09656356275081635, "learning_rate": 4.154779636019781e-05, "loss": 6.9998, "step": 16080 }, { "epoch": 0.16620428751576294, "grad_norm": 0.11184573173522949, "learning_rate": 4.152469899534709e-05, "loss": 7.0078, "step": 16090 }, { "epoch": 0.1664564943253468, "grad_norm": 0.11444193124771118, "learning_rate": 4.150159361358194e-05, "loss": 7.0095, "step": 16100 }, { "epoch": 0.16670870113493064, "grad_norm": 0.11603325605392456, "learning_rate": 4.1478480230975156e-05, "loss": 7.0104, "step": 16110 }, { "epoch": 0.16696090794451451, "grad_norm": 0.10411125421524048, "learning_rate": 4.1455358863605074e-05, "loss": 7.0042, "step": 16120 }, { "epoch": 0.16721311475409836, "grad_norm": 0.10202950239181519, "learning_rate": 4.143222952755557e-05, "loss": 7.0018, "step": 16130 }, { "epoch": 0.1674653215636822, "grad_norm": 0.10056120902299881, "learning_rate": 4.140909223891609e-05, "loss": 7.0015, "step": 16140 }, { "epoch": 0.1677175283732661, "grad_norm": 0.10816866159439087, "learning_rate": 4.1385947013781614e-05, "loss": 7.0029, "step": 16150 }, { "epoch": 0.16796973518284994, "grad_norm": 0.11281464993953705, "learning_rate": 4.1362793868252604e-05, "loss": 7.0015, "step": 16160 }, { "epoch": 0.1682219419924338, "grad_norm": 0.09754366427659988, "learning_rate": 4.1339632818435076e-05, "loss": 6.993, "step": 16170 }, { "epoch": 0.16847414880201766, "grad_norm": 0.115234375, "learning_rate": 4.131646388044054e-05, "loss": 7.0009, "step": 16180 }, { "epoch": 0.16872635561160151, "grad_norm": 0.11331181228160858, "learning_rate": 4.129328707038596e-05, "loss": 7.0044, "step": 16190 }, { "epoch": 0.16897856242118536, "grad_norm": 0.10434580594301224, "learning_rate": 4.1270102404393806e-05, "loss": 7.0042, "step": 16200 }, { "epoch": 0.16923076923076924, "grad_norm": 0.10664616525173187, "learning_rate": 4.124690989859199e-05, "loss": 7.0046, "step": 16210 }, { "epoch": 0.1694829760403531, "grad_norm": 0.11801372468471527, "learning_rate": 4.1223709569113896e-05, "loss": 7.0051, "step": 16220 }, { "epoch": 0.16973518284993694, "grad_norm": 0.10781467705965042, "learning_rate": 4.1200501432098356e-05, "loss": 7.0054, "step": 16230 }, { "epoch": 0.16998738965952082, "grad_norm": 0.11129892617464066, "learning_rate": 4.117728550368963e-05, "loss": 7.0076, "step": 16240 }, { "epoch": 0.17023959646910466, "grad_norm": 0.12168590724468231, "learning_rate": 4.115406180003735e-05, "loss": 7.0123, "step": 16250 }, { "epoch": 0.17049180327868851, "grad_norm": 0.09774010628461838, "learning_rate": 4.113083033729664e-05, "loss": 6.998, "step": 16260 }, { "epoch": 0.1707440100882724, "grad_norm": 0.09860575944185257, "learning_rate": 4.110759113162796e-05, "loss": 6.9934, "step": 16270 }, { "epoch": 0.17099621689785624, "grad_norm": 0.09320581704378128, "learning_rate": 4.108434419919717e-05, "loss": 6.998, "step": 16280 }, { "epoch": 0.1712484237074401, "grad_norm": 0.09252901375293732, "learning_rate": 4.106108955617552e-05, "loss": 6.9921, "step": 16290 }, { "epoch": 0.17150063051702397, "grad_norm": 0.10304354131221771, "learning_rate": 4.10378272187396e-05, "loss": 6.994, "step": 16300 }, { "epoch": 0.17175283732660782, "grad_norm": 0.10043667256832123, "learning_rate": 4.1014557203071386e-05, "loss": 7.0073, "step": 16310 }, { "epoch": 0.17200504413619166, "grad_norm": 0.09884664416313171, "learning_rate": 4.099127952535815e-05, "loss": 7.0043, "step": 16320 }, { "epoch": 0.17225725094577554, "grad_norm": 0.09118079394102097, "learning_rate": 4.096799420179254e-05, "loss": 7.0025, "step": 16330 }, { "epoch": 0.1725094577553594, "grad_norm": 0.09523171931505203, "learning_rate": 4.094470124857248e-05, "loss": 6.9965, "step": 16340 }, { "epoch": 0.17276166456494324, "grad_norm": 0.10350441932678223, "learning_rate": 4.092140068190125e-05, "loss": 6.9989, "step": 16350 }, { "epoch": 0.17301387137452712, "grad_norm": 0.10515157878398895, "learning_rate": 4.089809251798739e-05, "loss": 7.0036, "step": 16360 }, { "epoch": 0.17326607818411097, "grad_norm": 0.11102371662855148, "learning_rate": 4.087477677304472e-05, "loss": 7.0082, "step": 16370 }, { "epoch": 0.17351828499369484, "grad_norm": 0.1119445413351059, "learning_rate": 4.085145346329235e-05, "loss": 7.0141, "step": 16380 }, { "epoch": 0.1737704918032787, "grad_norm": 0.10726510733366013, "learning_rate": 4.0828122604954675e-05, "loss": 7.0051, "step": 16390 }, { "epoch": 0.17402269861286254, "grad_norm": 0.11659623682498932, "learning_rate": 4.080478421426129e-05, "loss": 7.0095, "step": 16400 }, { "epoch": 0.17427490542244642, "grad_norm": 0.11123670637607574, "learning_rate": 4.0781438307447054e-05, "loss": 7.0115, "step": 16410 }, { "epoch": 0.17452711223203027, "grad_norm": 0.11421597003936768, "learning_rate": 4.075808490075207e-05, "loss": 7.0081, "step": 16420 }, { "epoch": 0.17477931904161412, "grad_norm": 0.10416650772094727, "learning_rate": 4.0734724010421636e-05, "loss": 7.0017, "step": 16430 }, { "epoch": 0.175031525851198, "grad_norm": 0.10879786312580109, "learning_rate": 4.071135565270627e-05, "loss": 7.0022, "step": 16440 }, { "epoch": 0.17528373266078184, "grad_norm": 0.1043776199221611, "learning_rate": 4.068797984386167e-05, "loss": 7.006, "step": 16450 }, { "epoch": 0.1755359394703657, "grad_norm": 0.11515956372022629, "learning_rate": 4.066459660014874e-05, "loss": 7.0105, "step": 16460 }, { "epoch": 0.17578814627994957, "grad_norm": 0.1039847657084465, "learning_rate": 4.064120593783353e-05, "loss": 7.0075, "step": 16470 }, { "epoch": 0.17604035308953342, "grad_norm": 0.11172743141651154, "learning_rate": 4.061780787318725e-05, "loss": 7.0063, "step": 16480 }, { "epoch": 0.17629255989911727, "grad_norm": 0.10360013693571091, "learning_rate": 4.0594402422486317e-05, "loss": 7.0058, "step": 16490 }, { "epoch": 0.17654476670870115, "grad_norm": 0.08543200045824051, "learning_rate": 4.0570989602012186e-05, "loss": 6.993, "step": 16500 }, { "epoch": 0.176796973518285, "grad_norm": 0.09365221858024597, "learning_rate": 4.0547569428051515e-05, "loss": 6.9929, "step": 16510 }, { "epoch": 0.17704918032786884, "grad_norm": 0.11247263848781586, "learning_rate": 4.052414191689606e-05, "loss": 7.0032, "step": 16520 }, { "epoch": 0.17730138713745272, "grad_norm": 0.0932311937212944, "learning_rate": 4.050070708484267e-05, "loss": 7.0069, "step": 16530 }, { "epoch": 0.17755359394703657, "grad_norm": 0.0967860221862793, "learning_rate": 4.0477264948193314e-05, "loss": 6.9929, "step": 16540 }, { "epoch": 0.17780580075662042, "grad_norm": 0.09428240358829498, "learning_rate": 4.045381552325498e-05, "loss": 6.9938, "step": 16550 }, { "epoch": 0.1780580075662043, "grad_norm": 0.10936015099287033, "learning_rate": 4.04303588263398e-05, "loss": 7.0002, "step": 16560 }, { "epoch": 0.17831021437578815, "grad_norm": 0.10343795269727707, "learning_rate": 4.040689487376493e-05, "loss": 7.0012, "step": 16570 }, { "epoch": 0.178562421185372, "grad_norm": 0.10412423312664032, "learning_rate": 4.038342368185255e-05, "loss": 7.0019, "step": 16580 }, { "epoch": 0.17881462799495587, "grad_norm": 0.10830443352460861, "learning_rate": 4.0359945266929934e-05, "loss": 7.0044, "step": 16590 }, { "epoch": 0.17906683480453972, "grad_norm": 0.10987593233585358, "learning_rate": 4.033645964532933e-05, "loss": 7.0059, "step": 16600 }, { "epoch": 0.17931904161412357, "grad_norm": 0.09905887395143509, "learning_rate": 4.031296683338801e-05, "loss": 7.0057, "step": 16610 }, { "epoch": 0.17957124842370745, "grad_norm": 0.11479998379945755, "learning_rate": 4.0289466847448266e-05, "loss": 7.0042, "step": 16620 }, { "epoch": 0.1798234552332913, "grad_norm": 0.10157646238803864, "learning_rate": 4.026595970385737e-05, "loss": 7.0083, "step": 16630 }, { "epoch": 0.18007566204287515, "grad_norm": 0.10554708540439606, "learning_rate": 4.024244541896756e-05, "loss": 7.0045, "step": 16640 }, { "epoch": 0.18032786885245902, "grad_norm": 0.10272038727998734, "learning_rate": 4.021892400913606e-05, "loss": 6.9995, "step": 16650 }, { "epoch": 0.18058007566204287, "grad_norm": 0.11475597321987152, "learning_rate": 4.0195395490725046e-05, "loss": 7.0067, "step": 16660 }, { "epoch": 0.18083228247162672, "grad_norm": 0.10084796696901321, "learning_rate": 4.0171859880101644e-05, "loss": 7.0055, "step": 16670 }, { "epoch": 0.1810844892812106, "grad_norm": 0.09776605665683746, "learning_rate": 4.014831719363789e-05, "loss": 7.0019, "step": 16680 }, { "epoch": 0.18133669609079445, "grad_norm": 0.09570056945085526, "learning_rate": 4.012476744771076e-05, "loss": 6.9988, "step": 16690 }, { "epoch": 0.18158890290037832, "grad_norm": 0.10291571170091629, "learning_rate": 4.010121065870216e-05, "loss": 7.0026, "step": 16700 }, { "epoch": 0.18184110970996217, "grad_norm": 0.11162550747394562, "learning_rate": 4.007764684299885e-05, "loss": 7.0047, "step": 16710 }, { "epoch": 0.18209331651954602, "grad_norm": 0.10547913610935211, "learning_rate": 4.0054076016992525e-05, "loss": 7.0074, "step": 16720 }, { "epoch": 0.1823455233291299, "grad_norm": 0.09919074177742004, "learning_rate": 4.003049819707972e-05, "loss": 7.0048, "step": 16730 }, { "epoch": 0.18259773013871375, "grad_norm": 0.10127101838588715, "learning_rate": 4.0006913399661844e-05, "loss": 7.0018, "step": 16740 }, { "epoch": 0.1828499369482976, "grad_norm": 0.08531259745359421, "learning_rate": 3.998332164114518e-05, "loss": 6.9934, "step": 16750 }, { "epoch": 0.18310214375788147, "grad_norm": 0.0939788743853569, "learning_rate": 3.995972293794083e-05, "loss": 6.9988, "step": 16760 }, { "epoch": 0.18335435056746532, "grad_norm": 0.10111833363771439, "learning_rate": 3.993611730646473e-05, "loss": 7.0025, "step": 16770 }, { "epoch": 0.18360655737704917, "grad_norm": 0.09774357825517654, "learning_rate": 3.991250476313766e-05, "loss": 6.9969, "step": 16780 }, { "epoch": 0.18385876418663305, "grad_norm": 0.09971390664577484, "learning_rate": 3.988888532438517e-05, "loss": 7.0012, "step": 16790 }, { "epoch": 0.1841109709962169, "grad_norm": 0.08583778887987137, "learning_rate": 3.9865259006637635e-05, "loss": 6.9961, "step": 16800 }, { "epoch": 0.18436317780580075, "grad_norm": 0.08340590447187424, "learning_rate": 3.98416258263302e-05, "loss": 6.9859, "step": 16810 }, { "epoch": 0.18461538461538463, "grad_norm": 0.09336794167757034, "learning_rate": 3.9817985799902804e-05, "loss": 6.9883, "step": 16820 }, { "epoch": 0.18486759142496847, "grad_norm": 0.10720760375261307, "learning_rate": 3.9794338943800116e-05, "loss": 6.9996, "step": 16830 }, { "epoch": 0.18511979823455232, "grad_norm": 0.10585761815309525, "learning_rate": 3.977068527447158e-05, "loss": 7.0072, "step": 16840 }, { "epoch": 0.1853720050441362, "grad_norm": 0.11035221070051193, "learning_rate": 3.9747024808371385e-05, "loss": 7.0091, "step": 16850 }, { "epoch": 0.18562421185372005, "grad_norm": 0.09950698167085648, "learning_rate": 3.972335756195842e-05, "loss": 7.0051, "step": 16860 }, { "epoch": 0.1858764186633039, "grad_norm": 0.09373373538255692, "learning_rate": 3.969968355169631e-05, "loss": 6.9966, "step": 16870 }, { "epoch": 0.18612862547288778, "grad_norm": 0.10355626046657562, "learning_rate": 3.96760027940534e-05, "loss": 6.9978, "step": 16880 }, { "epoch": 0.18638083228247163, "grad_norm": 0.10366398096084595, "learning_rate": 3.965231530550268e-05, "loss": 7.0078, "step": 16890 }, { "epoch": 0.18663303909205547, "grad_norm": 0.10907870531082153, "learning_rate": 3.962862110252187e-05, "loss": 7.0057, "step": 16900 }, { "epoch": 0.18688524590163935, "grad_norm": 0.10080807656049728, "learning_rate": 3.960492020159335e-05, "loss": 7.0035, "step": 16910 }, { "epoch": 0.1871374527112232, "grad_norm": 0.09524153918027878, "learning_rate": 3.958121261920415e-05, "loss": 7.0016, "step": 16920 }, { "epoch": 0.18738965952080705, "grad_norm": 0.08979348838329315, "learning_rate": 3.955749837184595e-05, "loss": 6.9966, "step": 16930 }, { "epoch": 0.18764186633039093, "grad_norm": 0.09233015775680542, "learning_rate": 3.9533777476015055e-05, "loss": 6.9924, "step": 16940 }, { "epoch": 0.18789407313997478, "grad_norm": 0.09216535091400146, "learning_rate": 3.951004994821242e-05, "loss": 6.9963, "step": 16950 }, { "epoch": 0.18814627994955863, "grad_norm": 0.09425920993089676, "learning_rate": 3.9486315804943605e-05, "loss": 6.9939, "step": 16960 }, { "epoch": 0.1883984867591425, "grad_norm": 0.10914503037929535, "learning_rate": 3.9462575062718747e-05, "loss": 6.9992, "step": 16970 }, { "epoch": 0.18865069356872635, "grad_norm": 0.10159265995025635, "learning_rate": 3.9438827738052624e-05, "loss": 7.0078, "step": 16980 }, { "epoch": 0.1889029003783102, "grad_norm": 0.09171033650636673, "learning_rate": 3.941507384746454e-05, "loss": 7.0001, "step": 16990 }, { "epoch": 0.18915510718789408, "grad_norm": 0.10420990735292435, "learning_rate": 3.93913134074784e-05, "loss": 7.0046, "step": 17000 }, { "epoch": 0.18940731399747793, "grad_norm": 0.11047424376010895, "learning_rate": 3.9367546434622646e-05, "loss": 7.0097, "step": 17010 }, { "epoch": 0.1896595208070618, "grad_norm": 0.10324215888977051, "learning_rate": 3.934377294543026e-05, "loss": 7.0062, "step": 17020 }, { "epoch": 0.18991172761664565, "grad_norm": 0.10437892377376556, "learning_rate": 3.931999295643881e-05, "loss": 7.0044, "step": 17030 }, { "epoch": 0.1901639344262295, "grad_norm": 0.09950092434883118, "learning_rate": 3.929620648419031e-05, "loss": 7.0024, "step": 17040 }, { "epoch": 0.19041614123581338, "grad_norm": 0.09724681824445724, "learning_rate": 3.927241354523133e-05, "loss": 6.9994, "step": 17050 }, { "epoch": 0.19066834804539723, "grad_norm": 0.09033042192459106, "learning_rate": 3.9248614156112936e-05, "loss": 6.9926, "step": 17060 }, { "epoch": 0.19092055485498108, "grad_norm": 0.08438222855329514, "learning_rate": 3.922480833339065e-05, "loss": 6.9932, "step": 17070 }, { "epoch": 0.19117276166456496, "grad_norm": 0.1112145259976387, "learning_rate": 3.920099609362451e-05, "loss": 7.0026, "step": 17080 }, { "epoch": 0.1914249684741488, "grad_norm": 0.10640944540500641, "learning_rate": 3.9177177453379e-05, "loss": 7.0071, "step": 17090 }, { "epoch": 0.19167717528373265, "grad_norm": 0.10960220545530319, "learning_rate": 3.9153352429223055e-05, "loss": 7.0085, "step": 17100 }, { "epoch": 0.19192938209331653, "grad_norm": 0.10787414014339447, "learning_rate": 3.912952103773004e-05, "loss": 7.0058, "step": 17110 }, { "epoch": 0.19218158890290038, "grad_norm": 0.1054353192448616, "learning_rate": 3.9105683295477775e-05, "loss": 7.0056, "step": 17120 }, { "epoch": 0.19243379571248423, "grad_norm": 0.10124365985393524, "learning_rate": 3.9081839219048476e-05, "loss": 7.0007, "step": 17130 }, { "epoch": 0.1926860025220681, "grad_norm": 0.07704310864210129, "learning_rate": 3.905798882502877e-05, "loss": 6.992, "step": 17140 }, { "epoch": 0.19293820933165196, "grad_norm": 0.08658108115196228, "learning_rate": 3.90341321300097e-05, "loss": 6.9957, "step": 17150 }, { "epoch": 0.1931904161412358, "grad_norm": 0.09134450554847717, "learning_rate": 3.901026915058665e-05, "loss": 6.9888, "step": 17160 }, { "epoch": 0.19344262295081968, "grad_norm": 0.10585814714431763, "learning_rate": 3.898639990335942e-05, "loss": 7.0059, "step": 17170 }, { "epoch": 0.19369482976040353, "grad_norm": 0.08816098421812057, "learning_rate": 3.8962524404932134e-05, "loss": 6.9997, "step": 17180 }, { "epoch": 0.19394703656998738, "grad_norm": 0.09418530017137527, "learning_rate": 3.893864267191329e-05, "loss": 6.9965, "step": 17190 }, { "epoch": 0.19419924337957126, "grad_norm": 0.10294044762849808, "learning_rate": 3.891475472091571e-05, "loss": 6.9982, "step": 17200 }, { "epoch": 0.1944514501891551, "grad_norm": 0.09774308651685715, "learning_rate": 3.889086056855654e-05, "loss": 6.9986, "step": 17210 }, { "epoch": 0.19470365699873896, "grad_norm": 0.08706880360841751, "learning_rate": 3.886696023145726e-05, "loss": 6.9942, "step": 17220 }, { "epoch": 0.19495586380832283, "grad_norm": 0.07973400503396988, "learning_rate": 3.884305372624362e-05, "loss": 6.9884, "step": 17230 }, { "epoch": 0.19520807061790668, "grad_norm": 0.10493867844343185, "learning_rate": 3.881914106954568e-05, "loss": 6.9963, "step": 17240 }, { "epoch": 0.19546027742749053, "grad_norm": 0.10324638336896896, "learning_rate": 3.879522227799778e-05, "loss": 7.0059, "step": 17250 }, { "epoch": 0.1957124842370744, "grad_norm": 0.10094523429870605, "learning_rate": 3.877129736823853e-05, "loss": 7.0014, "step": 17260 }, { "epoch": 0.19596469104665826, "grad_norm": 0.10411650687456131, "learning_rate": 3.8747366356910785e-05, "loss": 7.0028, "step": 17270 }, { "epoch": 0.1962168978562421, "grad_norm": 0.1058913916349411, "learning_rate": 3.872342926066165e-05, "loss": 7.0035, "step": 17280 }, { "epoch": 0.19646910466582598, "grad_norm": 0.10188938677310944, "learning_rate": 3.8699486096142454e-05, "loss": 7.0008, "step": 17290 }, { "epoch": 0.19672131147540983, "grad_norm": 0.10572533309459686, "learning_rate": 3.867553688000877e-05, "loss": 7.001, "step": 17300 }, { "epoch": 0.19697351828499368, "grad_norm": 0.09524282813072205, "learning_rate": 3.8651581628920346e-05, "loss": 7.0016, "step": 17310 }, { "epoch": 0.19722572509457756, "grad_norm": 0.08496283739805222, "learning_rate": 3.862762035954118e-05, "loss": 6.9937, "step": 17320 }, { "epoch": 0.1974779319041614, "grad_norm": 0.09662919491529465, "learning_rate": 3.8603653088539394e-05, "loss": 6.9903, "step": 17330 }, { "epoch": 0.19773013871374528, "grad_norm": 0.08805279433727264, "learning_rate": 3.857967983258733e-05, "loss": 6.9934, "step": 17340 }, { "epoch": 0.19798234552332913, "grad_norm": 0.11115919053554535, "learning_rate": 3.85557006083615e-05, "loss": 6.9995, "step": 17350 }, { "epoch": 0.19823455233291298, "grad_norm": 0.10638372600078583, "learning_rate": 3.85317154325425e-05, "loss": 7.004, "step": 17360 }, { "epoch": 0.19848675914249686, "grad_norm": 0.10136479884386063, "learning_rate": 3.850772432181514e-05, "loss": 7.0011, "step": 17370 }, { "epoch": 0.1987389659520807, "grad_norm": 0.10230688750743866, "learning_rate": 3.8483727292868336e-05, "loss": 7.0085, "step": 17380 }, { "epoch": 0.19899117276166456, "grad_norm": 0.09686612337827682, "learning_rate": 3.845972436239511e-05, "loss": 7.0043, "step": 17390 }, { "epoch": 0.19924337957124844, "grad_norm": 0.09073305875062943, "learning_rate": 3.8435715547092593e-05, "loss": 7.0037, "step": 17400 }, { "epoch": 0.19949558638083228, "grad_norm": 0.09950360655784607, "learning_rate": 3.841170086366202e-05, "loss": 7.0008, "step": 17410 }, { "epoch": 0.19974779319041613, "grad_norm": 0.10727016627788544, "learning_rate": 3.838768032880868e-05, "loss": 7.0012, "step": 17420 }, { "epoch": 0.2, "grad_norm": 0.09694778174161911, "learning_rate": 3.8363653959241975e-05, "loss": 6.9981, "step": 17430 }, { "epoch": 0.20025220680958386, "grad_norm": 0.10300920903682709, "learning_rate": 3.833962177167534e-05, "loss": 7.0042, "step": 17440 }, { "epoch": 0.2005044136191677, "grad_norm": 0.09202653914690018, "learning_rate": 3.831558378282627e-05, "loss": 6.9979, "step": 17450 }, { "epoch": 0.2007566204287516, "grad_norm": 0.1019822508096695, "learning_rate": 3.829154000941625e-05, "loss": 7.0019, "step": 17460 }, { "epoch": 0.20100882723833544, "grad_norm": 0.10082893818616867, "learning_rate": 3.826749046817086e-05, "loss": 7.0004, "step": 17470 }, { "epoch": 0.20126103404791928, "grad_norm": 0.10760249197483063, "learning_rate": 3.8243435175819655e-05, "loss": 7.0055, "step": 17480 }, { "epoch": 0.20151324085750316, "grad_norm": 0.09955215454101562, "learning_rate": 3.821937414909617e-05, "loss": 7.0044, "step": 17490 }, { "epoch": 0.201765447667087, "grad_norm": 0.09782486408948898, "learning_rate": 3.8195307404737966e-05, "loss": 7.001, "step": 17500 }, { "epoch": 0.20201765447667086, "grad_norm": 0.10144871473312378, "learning_rate": 3.817123495948658e-05, "loss": 7.0003, "step": 17510 }, { "epoch": 0.20226986128625474, "grad_norm": 0.10536039620637894, "learning_rate": 3.814715683008747e-05, "loss": 7.0064, "step": 17520 }, { "epoch": 0.2025220680958386, "grad_norm": 0.09587492048740387, "learning_rate": 3.8123073033290116e-05, "loss": 7.0062, "step": 17530 }, { "epoch": 0.20277427490542244, "grad_norm": 0.10279477387666702, "learning_rate": 3.809898358584788e-05, "loss": 7.0013, "step": 17540 }, { "epoch": 0.2030264817150063, "grad_norm": 0.08011036366224289, "learning_rate": 3.807488850451808e-05, "loss": 6.9903, "step": 17550 }, { "epoch": 0.20327868852459016, "grad_norm": 0.0920015275478363, "learning_rate": 3.805078780606198e-05, "loss": 6.9969, "step": 17560 }, { "epoch": 0.203530895334174, "grad_norm": 0.1004965752363205, "learning_rate": 3.802668150724469e-05, "loss": 6.9984, "step": 17570 }, { "epoch": 0.2037831021437579, "grad_norm": 0.10500956326723099, "learning_rate": 3.800256962483527e-05, "loss": 7.0047, "step": 17580 }, { "epoch": 0.20403530895334174, "grad_norm": 0.11165912449359894, "learning_rate": 3.7978452175606645e-05, "loss": 7.0073, "step": 17590 }, { "epoch": 0.2042875157629256, "grad_norm": 0.09071670472621918, "learning_rate": 3.795432917633559e-05, "loss": 6.9981, "step": 17600 }, { "epoch": 0.20453972257250946, "grad_norm": 0.0762174054980278, "learning_rate": 3.793020064380279e-05, "loss": 6.9874, "step": 17610 }, { "epoch": 0.2047919293820933, "grad_norm": 0.08393383026123047, "learning_rate": 3.790606659479273e-05, "loss": 6.9847, "step": 17620 }, { "epoch": 0.20504413619167716, "grad_norm": 0.08353477716445923, "learning_rate": 3.788192704609375e-05, "loss": 6.9876, "step": 17630 }, { "epoch": 0.20529634300126104, "grad_norm": 0.08373138308525085, "learning_rate": 3.7857782014498046e-05, "loss": 6.9936, "step": 17640 }, { "epoch": 0.2055485498108449, "grad_norm": 0.07880168408155441, "learning_rate": 3.783363151680157e-05, "loss": 6.9894, "step": 17650 }, { "epoch": 0.20580075662042877, "grad_norm": 0.09982962161302567, "learning_rate": 3.7809475569804135e-05, "loss": 6.9941, "step": 17660 }, { "epoch": 0.20605296343001261, "grad_norm": 0.1061861664056778, "learning_rate": 3.77853141903093e-05, "loss": 7.0053, "step": 17670 }, { "epoch": 0.20630517023959646, "grad_norm": 0.10500431805849075, "learning_rate": 3.7761147395124426e-05, "loss": 7.0049, "step": 17680 }, { "epoch": 0.20655737704918034, "grad_norm": 0.09699840098619461, "learning_rate": 3.773697520106063e-05, "loss": 7.0027, "step": 17690 }, { "epoch": 0.2068095838587642, "grad_norm": 0.1009097769856453, "learning_rate": 3.77127976249328e-05, "loss": 7.0019, "step": 17700 }, { "epoch": 0.20706179066834804, "grad_norm": 0.10079295933246613, "learning_rate": 3.7688614683559555e-05, "loss": 7.0008, "step": 17710 }, { "epoch": 0.20731399747793192, "grad_norm": 0.09272097796201706, "learning_rate": 3.766442639376325e-05, "loss": 7.0015, "step": 17720 }, { "epoch": 0.20756620428751577, "grad_norm": 0.09579066932201385, "learning_rate": 3.7640232772369954e-05, "loss": 6.9973, "step": 17730 }, { "epoch": 0.20781841109709961, "grad_norm": 0.08658523112535477, "learning_rate": 3.7616033836209456e-05, "loss": 6.9942, "step": 17740 }, { "epoch": 0.2080706179066835, "grad_norm": 0.09000244736671448, "learning_rate": 3.759182960211523e-05, "loss": 6.994, "step": 17750 }, { "epoch": 0.20832282471626734, "grad_norm": 0.08459692448377609, "learning_rate": 3.7567620086924456e-05, "loss": 6.9934, "step": 17760 }, { "epoch": 0.2085750315258512, "grad_norm": 0.08988776803016663, "learning_rate": 3.754340530747796e-05, "loss": 6.9971, "step": 17770 }, { "epoch": 0.20882723833543507, "grad_norm": 0.0928448960185051, "learning_rate": 3.7519185280620244e-05, "loss": 6.9938, "step": 17780 }, { "epoch": 0.20907944514501892, "grad_norm": 0.09614650905132294, "learning_rate": 3.749496002319948e-05, "loss": 6.9964, "step": 17790 }, { "epoch": 0.20933165195460277, "grad_norm": 0.10001660138368607, "learning_rate": 3.747072955206744e-05, "loss": 7.0, "step": 17800 }, { "epoch": 0.20958385876418664, "grad_norm": 0.09263212233781815, "learning_rate": 3.744649388407955e-05, "loss": 6.995, "step": 17810 }, { "epoch": 0.2098360655737705, "grad_norm": 0.09688743948936462, "learning_rate": 3.742225303609482e-05, "loss": 6.9957, "step": 17820 }, { "epoch": 0.21008827238335434, "grad_norm": 0.10155510902404785, "learning_rate": 3.739800702497592e-05, "loss": 6.9968, "step": 17830 }, { "epoch": 0.21034047919293822, "grad_norm": 0.08783816546201706, "learning_rate": 3.737375586758907e-05, "loss": 6.9896, "step": 17840 }, { "epoch": 0.21059268600252207, "grad_norm": 0.0944366529583931, "learning_rate": 3.734949958080406e-05, "loss": 7.0004, "step": 17850 }, { "epoch": 0.21084489281210592, "grad_norm": 0.10054390877485275, "learning_rate": 3.732523818149427e-05, "loss": 7.0016, "step": 17860 }, { "epoch": 0.2110970996216898, "grad_norm": 0.08984647691249847, "learning_rate": 3.7300971686536644e-05, "loss": 7.0013, "step": 17870 }, { "epoch": 0.21134930643127364, "grad_norm": 0.09222399443387985, "learning_rate": 3.7276700112811644e-05, "loss": 6.9954, "step": 17880 }, { "epoch": 0.2116015132408575, "grad_norm": 0.10111714899539948, "learning_rate": 3.72524234772033e-05, "loss": 7.0031, "step": 17890 }, { "epoch": 0.21185372005044137, "grad_norm": 0.10637437552213669, "learning_rate": 3.722814179659912e-05, "loss": 7.0039, "step": 17900 }, { "epoch": 0.21210592686002522, "grad_norm": 0.10143065452575684, "learning_rate": 3.720385508789015e-05, "loss": 7.0014, "step": 17910 }, { "epoch": 0.21235813366960907, "grad_norm": 0.09935248643159866, "learning_rate": 3.717956336797096e-05, "loss": 7.003, "step": 17920 }, { "epoch": 0.21261034047919294, "grad_norm": 0.09793975949287415, "learning_rate": 3.715526665373952e-05, "loss": 6.9962, "step": 17930 }, { "epoch": 0.2128625472887768, "grad_norm": 0.0891643688082695, "learning_rate": 3.713096496209737e-05, "loss": 6.9952, "step": 17940 }, { "epoch": 0.21311475409836064, "grad_norm": 0.10526090860366821, "learning_rate": 3.710665830994945e-05, "loss": 7.002, "step": 17950 }, { "epoch": 0.21336696090794452, "grad_norm": 0.09598386287689209, "learning_rate": 3.7082346714204176e-05, "loss": 7.0022, "step": 17960 }, { "epoch": 0.21361916771752837, "grad_norm": 0.09996975213289261, "learning_rate": 3.70580301917734e-05, "loss": 7.0042, "step": 17970 }, { "epoch": 0.21387137452711225, "grad_norm": 0.0909806489944458, "learning_rate": 3.70337087595724e-05, "loss": 7.005, "step": 17980 }, { "epoch": 0.2141235813366961, "grad_norm": 0.09485196322202682, "learning_rate": 3.700938243451986e-05, "loss": 6.9996, "step": 17990 }, { "epoch": 0.21437578814627994, "grad_norm": 0.09084349870681763, "learning_rate": 3.69850512335379e-05, "loss": 6.9934, "step": 18000 }, { "epoch": 0.21462799495586382, "grad_norm": 0.10269545018672943, "learning_rate": 3.6960715173551977e-05, "loss": 7.0038, "step": 18010 }, { "epoch": 0.21488020176544767, "grad_norm": 0.09692174196243286, "learning_rate": 3.6936374271490976e-05, "loss": 7.0042, "step": 18020 }, { "epoch": 0.21513240857503152, "grad_norm": 0.08914893120527267, "learning_rate": 3.691202854428713e-05, "loss": 6.9948, "step": 18030 }, { "epoch": 0.2153846153846154, "grad_norm": 0.08659535646438599, "learning_rate": 3.688767800887603e-05, "loss": 6.9917, "step": 18040 }, { "epoch": 0.21563682219419925, "grad_norm": 0.09499354660511017, "learning_rate": 3.686332268219662e-05, "loss": 6.9913, "step": 18050 }, { "epoch": 0.2158890290037831, "grad_norm": 0.08821921795606613, "learning_rate": 3.683896258119117e-05, "loss": 6.9936, "step": 18060 }, { "epoch": 0.21614123581336697, "grad_norm": 0.08856159448623657, "learning_rate": 3.681459772280527e-05, "loss": 6.9984, "step": 18070 }, { "epoch": 0.21639344262295082, "grad_norm": 0.09344856441020966, "learning_rate": 3.679022812398781e-05, "loss": 6.9952, "step": 18080 }, { "epoch": 0.21664564943253467, "grad_norm": 0.09430032223463058, "learning_rate": 3.6765853801691014e-05, "loss": 6.9952, "step": 18090 }, { "epoch": 0.21689785624211855, "grad_norm": 0.0964246392250061, "learning_rate": 3.674147477287035e-05, "loss": 6.9981, "step": 18100 }, { "epoch": 0.2171500630517024, "grad_norm": 0.08739034831523895, "learning_rate": 3.6717091054484564e-05, "loss": 6.9967, "step": 18110 }, { "epoch": 0.21740226986128625, "grad_norm": 0.09300383925437927, "learning_rate": 3.669270266349571e-05, "loss": 6.9968, "step": 18120 }, { "epoch": 0.21765447667087012, "grad_norm": 0.09953509271144867, "learning_rate": 3.6668309616869024e-05, "loss": 6.9992, "step": 18130 }, { "epoch": 0.21790668348045397, "grad_norm": 0.10223366320133209, "learning_rate": 3.6643911931573036e-05, "loss": 6.9988, "step": 18140 }, { "epoch": 0.21815889029003782, "grad_norm": 0.09389568120241165, "learning_rate": 3.661950962457948e-05, "loss": 6.9994, "step": 18150 }, { "epoch": 0.2184110970996217, "grad_norm": 0.08565200865268707, "learning_rate": 3.659510271286331e-05, "loss": 6.9891, "step": 18160 }, { "epoch": 0.21866330390920555, "grad_norm": 0.07862073928117752, "learning_rate": 3.657069121340267e-05, "loss": 6.9865, "step": 18170 }, { "epoch": 0.2189155107187894, "grad_norm": 0.07920432835817337, "learning_rate": 3.654627514317892e-05, "loss": 6.9863, "step": 18180 }, { "epoch": 0.21916771752837327, "grad_norm": 0.0742906853556633, "learning_rate": 3.652185451917657e-05, "loss": 6.9854, "step": 18190 }, { "epoch": 0.21941992433795712, "grad_norm": 0.0920514315366745, "learning_rate": 3.6497429358383326e-05, "loss": 6.9882, "step": 18200 }, { "epoch": 0.21967213114754097, "grad_norm": 0.09322722256183624, "learning_rate": 3.647299967779004e-05, "loss": 6.9975, "step": 18210 }, { "epoch": 0.21992433795712485, "grad_norm": 0.09846273064613342, "learning_rate": 3.644856549439068e-05, "loss": 6.9994, "step": 18220 }, { "epoch": 0.2201765447667087, "grad_norm": 0.09510577470064163, "learning_rate": 3.6424126825182404e-05, "loss": 7.0009, "step": 18230 }, { "epoch": 0.22042875157629255, "grad_norm": 0.0957375019788742, "learning_rate": 3.6399683687165436e-05, "loss": 6.9972, "step": 18240 }, { "epoch": 0.22068095838587642, "grad_norm": 0.09501732885837555, "learning_rate": 3.637523609734314e-05, "loss": 6.9965, "step": 18250 }, { "epoch": 0.22093316519546027, "grad_norm": 0.09040188044309616, "learning_rate": 3.6350784072721974e-05, "loss": 6.9988, "step": 18260 }, { "epoch": 0.22118537200504412, "grad_norm": 0.09907429665327072, "learning_rate": 3.6326327630311455e-05, "loss": 6.9952, "step": 18270 }, { "epoch": 0.221437578814628, "grad_norm": 0.09914768487215042, "learning_rate": 3.6301866787124224e-05, "loss": 7.0011, "step": 18280 }, { "epoch": 0.22168978562421185, "grad_norm": 0.0954091027379036, "learning_rate": 3.627740156017591e-05, "loss": 6.9993, "step": 18290 }, { "epoch": 0.22194199243379573, "grad_norm": 0.09900614619255066, "learning_rate": 3.625293196648527e-05, "loss": 6.9986, "step": 18300 }, { "epoch": 0.22219419924337958, "grad_norm": 0.09949107468128204, "learning_rate": 3.622845802307405e-05, "loss": 7.0004, "step": 18310 }, { "epoch": 0.22244640605296342, "grad_norm": 0.09019306302070618, "learning_rate": 3.620397974696702e-05, "loss": 6.9972, "step": 18320 }, { "epoch": 0.2226986128625473, "grad_norm": 0.09210684895515442, "learning_rate": 3.6179497155192e-05, "loss": 6.997, "step": 18330 }, { "epoch": 0.22295081967213115, "grad_norm": 0.08574312925338745, "learning_rate": 3.6155010264779785e-05, "loss": 6.995, "step": 18340 }, { "epoch": 0.223203026481715, "grad_norm": 0.08933115750551224, "learning_rate": 3.613051909276414e-05, "loss": 6.9949, "step": 18350 }, { "epoch": 0.22345523329129888, "grad_norm": 0.08677487820386887, "learning_rate": 3.6106023656181875e-05, "loss": 6.9955, "step": 18360 }, { "epoch": 0.22370744010088273, "grad_norm": 0.0888250395655632, "learning_rate": 3.608152397207269e-05, "loss": 6.9961, "step": 18370 }, { "epoch": 0.22395964691046658, "grad_norm": 0.08227866142988205, "learning_rate": 3.605702005747929e-05, "loss": 6.9944, "step": 18380 }, { "epoch": 0.22421185372005045, "grad_norm": 0.0750117376446724, "learning_rate": 3.603251192944731e-05, "loss": 6.9824, "step": 18390 }, { "epoch": 0.2244640605296343, "grad_norm": 0.08272726088762283, "learning_rate": 3.60079996050253e-05, "loss": 6.9842, "step": 18400 }, { "epoch": 0.22471626733921815, "grad_norm": 0.08886688947677612, "learning_rate": 3.598348310126477e-05, "loss": 6.9894, "step": 18410 }, { "epoch": 0.22496847414880203, "grad_norm": 0.09581403434276581, "learning_rate": 3.5958962435220086e-05, "loss": 6.993, "step": 18420 }, { "epoch": 0.22522068095838588, "grad_norm": 0.10012991726398468, "learning_rate": 3.593443762394853e-05, "loss": 6.995, "step": 18430 }, { "epoch": 0.22547288776796973, "grad_norm": 0.09888409823179245, "learning_rate": 3.59099086845103e-05, "loss": 6.9976, "step": 18440 }, { "epoch": 0.2257250945775536, "grad_norm": 0.09590132534503937, "learning_rate": 3.58853756339684e-05, "loss": 6.9976, "step": 18450 }, { "epoch": 0.22597730138713745, "grad_norm": 0.08815895020961761, "learning_rate": 3.586083848938876e-05, "loss": 6.9977, "step": 18460 }, { "epoch": 0.2262295081967213, "grad_norm": 0.09233537316322327, "learning_rate": 3.583629726784012e-05, "loss": 7.0003, "step": 18470 }, { "epoch": 0.22648171500630518, "grad_norm": 0.08305177837610245, "learning_rate": 3.581175198639405e-05, "loss": 6.9982, "step": 18480 }, { "epoch": 0.22673392181588903, "grad_norm": 0.08441679924726486, "learning_rate": 3.5787202662124985e-05, "loss": 6.9957, "step": 18490 }, { "epoch": 0.22698612862547288, "grad_norm": 0.08643144369125366, "learning_rate": 3.5762649312110116e-05, "loss": 6.9934, "step": 18500 }, { "epoch": 0.22723833543505675, "grad_norm": 0.08519355952739716, "learning_rate": 3.573809195342949e-05, "loss": 6.9931, "step": 18510 }, { "epoch": 0.2274905422446406, "grad_norm": 0.09477400034666061, "learning_rate": 3.5713530603165905e-05, "loss": 6.9911, "step": 18520 }, { "epoch": 0.22774274905422445, "grad_norm": 0.08996640145778656, "learning_rate": 3.5688965278404945e-05, "loss": 6.9951, "step": 18530 }, { "epoch": 0.22799495586380833, "grad_norm": 0.09412266314029694, "learning_rate": 3.566439599623497e-05, "loss": 6.9962, "step": 18540 }, { "epoch": 0.22824716267339218, "grad_norm": 0.09088553488254547, "learning_rate": 3.563982277374708e-05, "loss": 6.9974, "step": 18550 }, { "epoch": 0.22849936948297603, "grad_norm": 0.09432966262102127, "learning_rate": 3.5615245628035114e-05, "loss": 7.0014, "step": 18560 }, { "epoch": 0.2287515762925599, "grad_norm": 0.09372837841510773, "learning_rate": 3.559066457619566e-05, "loss": 6.9971, "step": 18570 }, { "epoch": 0.22900378310214375, "grad_norm": 0.08954507857561111, "learning_rate": 3.5566079635328007e-05, "loss": 6.996, "step": 18580 }, { "epoch": 0.2292559899117276, "grad_norm": 0.09833987057209015, "learning_rate": 3.5541490822534137e-05, "loss": 7.0027, "step": 18590 }, { "epoch": 0.22950819672131148, "grad_norm": 0.08957725018262863, "learning_rate": 3.5516898154918755e-05, "loss": 6.9962, "step": 18600 }, { "epoch": 0.22976040353089533, "grad_norm": 0.07657445222139359, "learning_rate": 3.549230164958923e-05, "loss": 6.9899, "step": 18610 }, { "epoch": 0.2300126103404792, "grad_norm": 0.09081990271806717, "learning_rate": 3.54677013236556e-05, "loss": 6.9919, "step": 18620 }, { "epoch": 0.23026481715006306, "grad_norm": 0.0863686203956604, "learning_rate": 3.544309719423056e-05, "loss": 6.9917, "step": 18630 }, { "epoch": 0.2305170239596469, "grad_norm": 0.0894240215420723, "learning_rate": 3.541848927842947e-05, "loss": 6.9906, "step": 18640 }, { "epoch": 0.23076923076923078, "grad_norm": 0.10034269094467163, "learning_rate": 3.5393877593370294e-05, "loss": 6.9998, "step": 18650 }, { "epoch": 0.23102143757881463, "grad_norm": 0.09650247544050217, "learning_rate": 3.536926215617365e-05, "loss": 7.0018, "step": 18660 }, { "epoch": 0.23127364438839848, "grad_norm": 0.09689777344465256, "learning_rate": 3.534464298396272e-05, "loss": 6.9977, "step": 18670 }, { "epoch": 0.23152585119798236, "grad_norm": 0.0931864082813263, "learning_rate": 3.532002009386335e-05, "loss": 6.9956, "step": 18680 }, { "epoch": 0.2317780580075662, "grad_norm": 0.07920259237289429, "learning_rate": 3.5295393503003904e-05, "loss": 6.9929, "step": 18690 }, { "epoch": 0.23203026481715006, "grad_norm": 0.0866096094250679, "learning_rate": 3.527076322851538e-05, "loss": 6.9852, "step": 18700 }, { "epoch": 0.00025220680958385876, "grad_norm": 0.0959169790148735, "learning_rate": 3.5246129287531284e-05, "loss": 7.003, "step": 18710 }, { "epoch": 0.0005044136191677175, "grad_norm": 0.0937923863530159, "learning_rate": 3.5221491697187706e-05, "loss": 6.9974, "step": 18720 }, { "epoch": 0.0007566204287515763, "grad_norm": 0.09289128333330154, "learning_rate": 3.519685047462328e-05, "loss": 6.9929, "step": 18730 }, { "epoch": 0.001008827238335435, "grad_norm": 0.09861356019973755, "learning_rate": 3.517220563697914e-05, "loss": 6.9958, "step": 18740 }, { "epoch": 0.0012610340479192938, "grad_norm": 0.09064722806215286, "learning_rate": 3.514755720139896e-05, "loss": 6.9966, "step": 18750 }, { "epoch": 0.0015132408575031526, "grad_norm": 0.08849099278450012, "learning_rate": 3.5122905185028895e-05, "loss": 6.993, "step": 18760 }, { "epoch": 0.0017654476670870113, "grad_norm": 0.08377307653427124, "learning_rate": 3.5098249605017605e-05, "loss": 6.9923, "step": 18770 }, { "epoch": 0.00201765447667087, "grad_norm": 0.1003725677728653, "learning_rate": 3.5073590478516236e-05, "loss": 6.9956, "step": 18780 }, { "epoch": 0.002269861286254729, "grad_norm": 0.09185110032558441, "learning_rate": 3.5048927822678375e-05, "loss": 6.9981, "step": 18790 }, { "epoch": 0.0025220680958385876, "grad_norm": 0.07348717004060745, "learning_rate": 3.502426165466009e-05, "loss": 6.984, "step": 18800 }, { "epoch": 0.0027742749054224464, "grad_norm": 0.08164697140455246, "learning_rate": 3.499959199161988e-05, "loss": 6.9844, "step": 18810 }, { "epoch": 0.003026481715006305, "grad_norm": 0.10387170314788818, "learning_rate": 3.497491885071867e-05, "loss": 6.9961, "step": 18820 }, { "epoch": 0.003278688524590164, "grad_norm": 0.10328804701566696, "learning_rate": 3.495024224911982e-05, "loss": 7.0054, "step": 18830 }, { "epoch": 0.0035308953341740227, "grad_norm": 0.10996127128601074, "learning_rate": 3.492556220398908e-05, "loss": 7.0108, "step": 18840 }, { "epoch": 0.0037831021437578815, "grad_norm": 0.09571533650159836, "learning_rate": 3.4900878732494614e-05, "loss": 7.0009, "step": 18850 }, { "epoch": 0.00403530895334174, "grad_norm": 0.08610700815916061, "learning_rate": 3.4876191851806966e-05, "loss": 6.9935, "step": 18860 }, { "epoch": 0.004287515762925599, "grad_norm": 0.08978790789842606, "learning_rate": 3.4851501579099024e-05, "loss": 6.9961, "step": 18870 }, { "epoch": 0.004539722572509458, "grad_norm": 0.09345486760139465, "learning_rate": 3.482680793154607e-05, "loss": 6.9937, "step": 18880 }, { "epoch": 0.0047919293820933165, "grad_norm": 0.06672913581132889, "learning_rate": 3.480211092632572e-05, "loss": 6.9876, "step": 18890 }, { "epoch": 0.005044136191677175, "grad_norm": 0.06758100539445877, "learning_rate": 3.477741058061792e-05, "loss": 6.9761, "step": 18900 }, { "epoch": 0.005296343001261034, "grad_norm": 0.08429968357086182, "learning_rate": 3.475270691160495e-05, "loss": 6.9861, "step": 18910 }, { "epoch": 0.005548549810844893, "grad_norm": 0.09739021956920624, "learning_rate": 3.4727999936471394e-05, "loss": 6.9976, "step": 18920 }, { "epoch": 0.005800756620428752, "grad_norm": 0.08585893362760544, "learning_rate": 3.4703289672404156e-05, "loss": 6.995, "step": 18930 }, { "epoch": 0.00605296343001261, "grad_norm": 0.08660632371902466, "learning_rate": 3.4678576136592396e-05, "loss": 6.9948, "step": 18940 }, { "epoch": 0.006305170239596469, "grad_norm": 0.09272204339504242, "learning_rate": 3.465385934622757e-05, "loss": 6.9952, "step": 18950 }, { "epoch": 0.006557377049180328, "grad_norm": 0.0877639651298523, "learning_rate": 3.4629139318503386e-05, "loss": 6.9941, "step": 18960 }, { "epoch": 0.006809583858764187, "grad_norm": 0.07984507828950882, "learning_rate": 3.4604416070615816e-05, "loss": 6.9837, "step": 18970 }, { "epoch": 0.007061790668348045, "grad_norm": 0.09527283906936646, "learning_rate": 3.4579689619763064e-05, "loss": 6.9891, "step": 18980 }, { "epoch": 0.007313997477931904, "grad_norm": 0.07378727942705154, "learning_rate": 3.455495998314558e-05, "loss": 6.9922, "step": 18990 }, { "epoch": 0.007566204287515763, "grad_norm": 0.08230351656675339, "learning_rate": 3.4530227177966e-05, "loss": 6.9874, "step": 19000 }, { "epoch": 0.007818411097099623, "grad_norm": 0.0953875258564949, "learning_rate": 3.450549122142918e-05, "loss": 6.9946, "step": 19010 }, { "epoch": 0.00807061790668348, "grad_norm": 0.0957489088177681, "learning_rate": 3.448075213074217e-05, "loss": 7.0008, "step": 19020 }, { "epoch": 0.00832282471626734, "grad_norm": 0.08329126238822937, "learning_rate": 3.44560099231142e-05, "loss": 6.996, "step": 19030 }, { "epoch": 0.008575031525851198, "grad_norm": 0.09775462746620178, "learning_rate": 3.443126461575668e-05, "loss": 6.9945, "step": 19040 }, { "epoch": 0.008827238335435058, "grad_norm": 0.09087511897087097, "learning_rate": 3.4406516225883135e-05, "loss": 6.9941, "step": 19050 }, { "epoch": 0.009079445145018916, "grad_norm": 0.10307837277650833, "learning_rate": 3.438176477070929e-05, "loss": 6.9985, "step": 19060 }, { "epoch": 0.009331651954602775, "grad_norm": 0.08986273407936096, "learning_rate": 3.435701026745295e-05, "loss": 6.9998, "step": 19070 }, { "epoch": 0.009583858764186633, "grad_norm": 0.09128424525260925, "learning_rate": 3.4332252733334073e-05, "loss": 6.9933, "step": 19080 }, { "epoch": 0.009836065573770493, "grad_norm": 0.09104502946138382, "learning_rate": 3.430749218557472e-05, "loss": 6.9956, "step": 19090 }, { "epoch": 0.01008827238335435, "grad_norm": 0.09482593089342117, "learning_rate": 3.428272864139904e-05, "loss": 6.9967, "step": 19100 }, { "epoch": 0.01034047919293821, "grad_norm": 0.09279388934373856, "learning_rate": 3.425796211803328e-05, "loss": 6.9992, "step": 19110 }, { "epoch": 0.010592686002522068, "grad_norm": 0.09486061334609985, "learning_rate": 3.423319263270575e-05, "loss": 6.9997, "step": 19120 }, { "epoch": 0.010844892812105928, "grad_norm": 0.08856762945652008, "learning_rate": 3.420842020264681e-05, "loss": 6.9976, "step": 19130 }, { "epoch": 0.011097099621689786, "grad_norm": 0.08175656199455261, "learning_rate": 3.4183644845088894e-05, "loss": 6.9881, "step": 19140 }, { "epoch": 0.011349306431273645, "grad_norm": 0.08366627246141434, "learning_rate": 3.415886657726644e-05, "loss": 6.9898, "step": 19150 }, { "epoch": 0.011601513240857503, "grad_norm": 0.06878870725631714, "learning_rate": 3.413408541641595e-05, "loss": 6.981, "step": 19160 }, { "epoch": 0.011853720050441363, "grad_norm": 0.09781108796596527, "learning_rate": 3.41093013797759e-05, "loss": 6.996, "step": 19170 }, { "epoch": 0.01210592686002522, "grad_norm": 0.0956212729215622, "learning_rate": 3.408451448458679e-05, "loss": 6.998, "step": 19180 }, { "epoch": 0.01235813366960908, "grad_norm": 0.09311249852180481, "learning_rate": 3.405972474809112e-05, "loss": 6.9998, "step": 19190 }, { "epoch": 0.012610340479192938, "grad_norm": 0.10111825913190842, "learning_rate": 3.4034932187533314e-05, "loss": 6.9975, "step": 19200 }, { "epoch": 0.012862547288776798, "grad_norm": 0.0993218719959259, "learning_rate": 3.401013682015982e-05, "loss": 6.9996, "step": 19210 }, { "epoch": 0.013114754098360656, "grad_norm": 0.09346321225166321, "learning_rate": 3.398533866321901e-05, "loss": 6.9963, "step": 19220 }, { "epoch": 0.013366960907944515, "grad_norm": 0.08527230471372604, "learning_rate": 3.396053773396119e-05, "loss": 6.9906, "step": 19230 }, { "epoch": 0.013619167717528373, "grad_norm": 0.06494206935167313, "learning_rate": 3.3935734049638605e-05, "loss": 6.9753, "step": 19240 }, { "epoch": 0.013871374527112233, "grad_norm": 0.07088220119476318, "learning_rate": 3.391092762750544e-05, "loss": 6.9764, "step": 19250 }, { "epoch": 0.01412358133669609, "grad_norm": 0.10177773237228394, "learning_rate": 3.388611848481773e-05, "loss": 6.9847, "step": 19260 }, { "epoch": 0.01437578814627995, "grad_norm": 0.09826643019914627, "learning_rate": 3.386130663883346e-05, "loss": 7.0017, "step": 19270 }, { "epoch": 0.014627994955863808, "grad_norm": 0.0907476618885994, "learning_rate": 3.3836492106812446e-05, "loss": 6.9983, "step": 19280 }, { "epoch": 0.014880201765447668, "grad_norm": 0.10719508677721024, "learning_rate": 3.38116749060164e-05, "loss": 7.0029, "step": 19290 }, { "epoch": 0.015132408575031526, "grad_norm": 0.10429894924163818, "learning_rate": 3.3786855053708906e-05, "loss": 7.0075, "step": 19300 }, { "epoch": 0.015384615384615385, "grad_norm": 0.0975504219532013, "learning_rate": 3.3762032567155346e-05, "loss": 6.9965, "step": 19310 }, { "epoch": 0.015636822194199245, "grad_norm": 0.08586429059505463, "learning_rate": 3.3737207463622974e-05, "loss": 6.9999, "step": 19320 }, { "epoch": 0.0158890290037831, "grad_norm": 0.09641449898481369, "learning_rate": 3.3712379760380855e-05, "loss": 6.9962, "step": 19330 }, { "epoch": 0.01614123581336696, "grad_norm": 0.09443552047014236, "learning_rate": 3.368754947469984e-05, "loss": 7.0001, "step": 19340 }, { "epoch": 0.01639344262295082, "grad_norm": 0.08852846175432205, "learning_rate": 3.3662716623852616e-05, "loss": 6.9956, "step": 19350 }, { "epoch": 0.01664564943253468, "grad_norm": 0.09017421305179596, "learning_rate": 3.3637881225113615e-05, "loss": 6.9881, "step": 19360 }, { "epoch": 0.016897856242118536, "grad_norm": 0.08561709523200989, "learning_rate": 3.3613043295759066e-05, "loss": 6.991, "step": 19370 }, { "epoch": 0.017150063051702396, "grad_norm": 0.08288823813199997, "learning_rate": 3.358820285306696e-05, "loss": 6.9916, "step": 19380 }, { "epoch": 0.017402269861286256, "grad_norm": 0.08458128571510315, "learning_rate": 3.356335991431702e-05, "loss": 6.9926, "step": 19390 }, { "epoch": 0.017654476670870115, "grad_norm": 0.07911253720521927, "learning_rate": 3.353851449679073e-05, "loss": 6.9896, "step": 19400 }, { "epoch": 0.01790668348045397, "grad_norm": 0.06938501447439194, "learning_rate": 3.351366661777126e-05, "loss": 6.9901, "step": 19410 }, { "epoch": 0.01815889029003783, "grad_norm": 0.07854857295751572, "learning_rate": 3.348881629454352e-05, "loss": 6.9875, "step": 19420 }, { "epoch": 0.01841109709962169, "grad_norm": 0.08634786307811737, "learning_rate": 3.3463963544394115e-05, "loss": 6.9909, "step": 19430 }, { "epoch": 0.01866330390920555, "grad_norm": 0.09552252292633057, "learning_rate": 3.3439108384611354e-05, "loss": 6.9977, "step": 19440 }, { "epoch": 0.018915510718789406, "grad_norm": 0.09214898943901062, "learning_rate": 3.341425083248518e-05, "loss": 6.9983, "step": 19450 }, { "epoch": 0.019167717528373266, "grad_norm": 0.09754513949155807, "learning_rate": 3.3389390905307244e-05, "loss": 6.9985, "step": 19460 }, { "epoch": 0.019419924337957126, "grad_norm": 0.09329919517040253, "learning_rate": 3.3364528620370826e-05, "loss": 6.9986, "step": 19470 }, { "epoch": 0.019672131147540985, "grad_norm": 0.08200576901435852, "learning_rate": 3.333966399497085e-05, "loss": 6.987, "step": 19480 }, { "epoch": 0.01992433795712484, "grad_norm": 0.0906553566455841, "learning_rate": 3.331479704640387e-05, "loss": 6.9942, "step": 19490 }, { "epoch": 0.0201765447667087, "grad_norm": 0.09682202339172363, "learning_rate": 3.3289927791968055e-05, "loss": 6.9963, "step": 19500 }, { "epoch": 0.02042875157629256, "grad_norm": 0.07031402736902237, "learning_rate": 3.326505624896319e-05, "loss": 6.9869, "step": 19510 }, { "epoch": 0.02068095838587642, "grad_norm": 0.062219031155109406, "learning_rate": 3.3240182434690625e-05, "loss": 6.9793, "step": 19520 }, { "epoch": 0.020933165195460277, "grad_norm": 0.08089980483055115, "learning_rate": 3.321530636645331e-05, "loss": 6.9773, "step": 19530 }, { "epoch": 0.021185372005044136, "grad_norm": 0.07161598652601242, "learning_rate": 3.3190428061555765e-05, "loss": 6.9788, "step": 19540 }, { "epoch": 0.021437578814627996, "grad_norm": 0.08733541518449783, "learning_rate": 3.316554753730405e-05, "loss": 6.9947, "step": 19550 }, { "epoch": 0.021689785624211855, "grad_norm": 0.0922471210360527, "learning_rate": 3.314066481100579e-05, "loss": 6.9929, "step": 19560 }, { "epoch": 0.02194199243379571, "grad_norm": 0.08640459179878235, "learning_rate": 3.3115779899970116e-05, "loss": 6.9889, "step": 19570 }, { "epoch": 0.02219419924337957, "grad_norm": 0.0869559720158577, "learning_rate": 3.30908928215077e-05, "loss": 6.9924, "step": 19580 }, { "epoch": 0.02244640605296343, "grad_norm": 0.08313434571027756, "learning_rate": 3.306600359293072e-05, "loss": 6.9926, "step": 19590 }, { "epoch": 0.02269861286254729, "grad_norm": 0.08985769003629684, "learning_rate": 3.304111223155284e-05, "loss": 6.9945, "step": 19600 }, { "epoch": 0.022950819672131147, "grad_norm": 0.0769045427441597, "learning_rate": 3.3016218754689205e-05, "loss": 6.988, "step": 19610 }, { "epoch": 0.023203026481715006, "grad_norm": 0.08353368937969208, "learning_rate": 3.299132317965645e-05, "loss": 6.9906, "step": 19620 }, { "epoch": 0.023455233291298866, "grad_norm": 0.09156104922294617, "learning_rate": 3.296642552377264e-05, "loss": 6.992, "step": 19630 }, { "epoch": 0.023707440100882726, "grad_norm": 0.08017124980688095, "learning_rate": 3.294152580435733e-05, "loss": 6.9886, "step": 19640 }, { "epoch": 0.02395964691046658, "grad_norm": 0.07892077416181564, "learning_rate": 3.291662403873147e-05, "loss": 6.9848, "step": 19650 }, { "epoch": 0.02421185372005044, "grad_norm": 0.08099817484617233, "learning_rate": 3.289172024421746e-05, "loss": 6.982, "step": 19660 }, { "epoch": 0.0244640605296343, "grad_norm": 0.09617035835981369, "learning_rate": 3.286681443813908e-05, "loss": 6.9923, "step": 19670 }, { "epoch": 0.02471626733921816, "grad_norm": 0.09589581936597824, "learning_rate": 3.284190663782156e-05, "loss": 6.9978, "step": 19680 }, { "epoch": 0.024968474148802017, "grad_norm": 0.09859813004732132, "learning_rate": 3.281699686059147e-05, "loss": 7.0003, "step": 19690 }, { "epoch": 0.025220680958385876, "grad_norm": 0.09635759145021439, "learning_rate": 3.279208512377677e-05, "loss": 6.9988, "step": 19700 }, { "epoch": 0.025472887767969736, "grad_norm": 0.09155570715665817, "learning_rate": 3.276717144470678e-05, "loss": 6.9962, "step": 19710 }, { "epoch": 0.025725094577553596, "grad_norm": 0.07683449238538742, "learning_rate": 3.27422558407122e-05, "loss": 6.9971, "step": 19720 }, { "epoch": 0.025977301387137452, "grad_norm": 0.07100699096918106, "learning_rate": 3.271733832912502e-05, "loss": 6.9767, "step": 19730 }, { "epoch": 0.02622950819672131, "grad_norm": 0.06905706971883774, "learning_rate": 3.269241892727861e-05, "loss": 6.976, "step": 19740 }, { "epoch": 0.02648171500630517, "grad_norm": 0.0789913609623909, "learning_rate": 3.26674976525076e-05, "loss": 6.9814, "step": 19750 }, { "epoch": 0.02673392181588903, "grad_norm": 0.09082896262407303, "learning_rate": 3.2642574522147965e-05, "loss": 6.9891, "step": 19760 }, { "epoch": 0.026986128625472887, "grad_norm": 0.08379251509904861, "learning_rate": 3.261764955353696e-05, "loss": 6.9905, "step": 19770 }, { "epoch": 0.027238335435056747, "grad_norm": 0.0770147368311882, "learning_rate": 3.2592722764013104e-05, "loss": 6.9921, "step": 19780 }, { "epoch": 0.027490542244640606, "grad_norm": 0.06889230757951736, "learning_rate": 3.2567794170916206e-05, "loss": 6.9858, "step": 19790 }, { "epoch": 0.027742749054224466, "grad_norm": 0.06057557091116905, "learning_rate": 3.2542863791587304e-05, "loss": 6.9672, "step": 19800 }, { "epoch": 0.027994955863808322, "grad_norm": 0.06999384611845016, "learning_rate": 3.251793164336871e-05, "loss": 6.9754, "step": 19810 }, { "epoch": 0.02824716267339218, "grad_norm": 0.08295650035142899, "learning_rate": 3.249299774360393e-05, "loss": 6.9814, "step": 19820 }, { "epoch": 0.02849936948297604, "grad_norm": 0.09009753912687302, "learning_rate": 3.2468062109637706e-05, "loss": 6.9924, "step": 19830 }, { "epoch": 0.0287515762925599, "grad_norm": 0.09401380270719528, "learning_rate": 3.2443124758816004e-05, "loss": 6.9972, "step": 19840 }, { "epoch": 0.029003783102143757, "grad_norm": 0.09720715880393982, "learning_rate": 3.2418185708485957e-05, "loss": 6.9989, "step": 19850 }, { "epoch": 0.029255989911727617, "grad_norm": 0.09027164429426193, "learning_rate": 3.239324497599589e-05, "loss": 6.998, "step": 19860 }, { "epoch": 0.029508196721311476, "grad_norm": 0.09693627804517746, "learning_rate": 3.236830257869531e-05, "loss": 6.9987, "step": 19870 }, { "epoch": 0.029760403530895336, "grad_norm": 0.10260212421417236, "learning_rate": 3.234335853393485e-05, "loss": 7.0054, "step": 19880 }, { "epoch": 0.030012610340479192, "grad_norm": 0.09531942009925842, "learning_rate": 3.231841285906632e-05, "loss": 7.0051, "step": 19890 }, { "epoch": 0.03026481715006305, "grad_norm": 0.11896346509456635, "learning_rate": 3.2293465571442676e-05, "loss": 7.0039, "step": 19900 }, { "epoch": 0.03051702395964691, "grad_norm": 0.09382806718349457, "learning_rate": 3.226851668841793e-05, "loss": 6.9989, "step": 19910 }, { "epoch": 0.03076923076923077, "grad_norm": 0.08629728108644485, "learning_rate": 3.2243566227347286e-05, "loss": 6.9927, "step": 19920 }, { "epoch": 0.031021437578814627, "grad_norm": 0.09542068839073181, "learning_rate": 3.2218614205587e-05, "loss": 6.9915, "step": 19930 }, { "epoch": 0.03127364438839849, "grad_norm": 0.09231430292129517, "learning_rate": 3.2193660640494406e-05, "loss": 6.9921, "step": 19940 }, { "epoch": 0.031525851197982346, "grad_norm": 0.08016908168792725, "learning_rate": 3.216870554942793e-05, "loss": 6.9937, "step": 19950 }, { "epoch": 0.0317780580075662, "grad_norm": 0.08079475909471512, "learning_rate": 3.214374894974706e-05, "loss": 6.9912, "step": 19960 }, { "epoch": 0.032030264817150066, "grad_norm": 0.08727104216814041, "learning_rate": 3.211879085881233e-05, "loss": 6.9912, "step": 19970 }, { "epoch": 0.03228247162673392, "grad_norm": 0.08728489279747009, "learning_rate": 3.209383129398533e-05, "loss": 6.9941, "step": 19980 }, { "epoch": 0.03253467843631778, "grad_norm": 0.08294636756181717, "learning_rate": 3.206887027262861e-05, "loss": 6.988, "step": 19990 }, { "epoch": 0.03278688524590164, "grad_norm": 0.06913056969642639, "learning_rate": 3.204390781210583e-05, "loss": 6.9782, "step": 20000 }, { "epoch": 0.0330390920554855, "grad_norm": 0.0860457569360733, "learning_rate": 3.2018943929781565e-05, "loss": 6.9784, "step": 20010 }, { "epoch": 0.03329129886506936, "grad_norm": 0.08694358170032501, "learning_rate": 3.199397864302142e-05, "loss": 6.9922, "step": 20020 }, { "epoch": 0.033543505674653216, "grad_norm": 0.09476464241743088, "learning_rate": 3.196901196919199e-05, "loss": 6.9918, "step": 20030 }, { "epoch": 0.03379571248423707, "grad_norm": 0.0854196846485138, "learning_rate": 3.194404392566081e-05, "loss": 7.0021, "step": 20040 }, { "epoch": 0.034047919293820936, "grad_norm": 0.0824333056807518, "learning_rate": 3.191907452979637e-05, "loss": 6.992, "step": 20050 }, { "epoch": 0.03430012610340479, "grad_norm": 0.083761066198349, "learning_rate": 3.1894103798968095e-05, "loss": 6.9903, "step": 20060 }, { "epoch": 0.03455233291298865, "grad_norm": 0.09246926754713058, "learning_rate": 3.1869131750546355e-05, "loss": 6.993, "step": 20070 }, { "epoch": 0.03480453972257251, "grad_norm": 0.09609467536211014, "learning_rate": 3.184415840190244e-05, "loss": 6.9992, "step": 20080 }, { "epoch": 0.03505674653215637, "grad_norm": 0.08104599267244339, "learning_rate": 3.1819183770408526e-05, "loss": 6.9904, "step": 20090 }, { "epoch": 0.03530895334174023, "grad_norm": 0.07431627064943314, "learning_rate": 3.179420787343769e-05, "loss": 6.9854, "step": 20100 }, { "epoch": 0.03556116015132409, "grad_norm": 0.0835537239909172, "learning_rate": 3.176923072836389e-05, "loss": 6.989, "step": 20110 }, { "epoch": 0.03581336696090794, "grad_norm": 0.0857275053858757, "learning_rate": 3.1744252352561956e-05, "loss": 6.9915, "step": 20120 }, { "epoch": 0.036065573770491806, "grad_norm": 0.06579935550689697, "learning_rate": 3.171927276340756e-05, "loss": 6.9837, "step": 20130 }, { "epoch": 0.03631778058007566, "grad_norm": 0.06885376572608948, "learning_rate": 3.1694291978277234e-05, "loss": 6.9748, "step": 20140 }, { "epoch": 0.03656998738965952, "grad_norm": 0.07298839092254639, "learning_rate": 3.1669310014548346e-05, "loss": 6.9771, "step": 20150 }, { "epoch": 0.03682219419924338, "grad_norm": 0.0760769173502922, "learning_rate": 3.164432688959906e-05, "loss": 6.9804, "step": 20160 }, { "epoch": 0.03707440100882724, "grad_norm": 0.08400636166334152, "learning_rate": 3.1619342620808356e-05, "loss": 6.9903, "step": 20170 }, { "epoch": 0.0373266078184111, "grad_norm": 0.08379557728767395, "learning_rate": 3.159435722555604e-05, "loss": 6.9925, "step": 20180 }, { "epoch": 0.03757881462799496, "grad_norm": 0.08866714686155319, "learning_rate": 3.156937072122264e-05, "loss": 6.994, "step": 20190 }, { "epoch": 0.03783102143757881, "grad_norm": 0.08675449341535568, "learning_rate": 3.154438312518953e-05, "loss": 6.993, "step": 20200 }, { "epoch": 0.038083228247162676, "grad_norm": 0.09078294038772583, "learning_rate": 3.1519394454838775e-05, "loss": 6.9921, "step": 20210 }, { "epoch": 0.03833543505674653, "grad_norm": 0.07859962433576584, "learning_rate": 3.149440472755323e-05, "loss": 6.9912, "step": 20220 }, { "epoch": 0.03858764186633039, "grad_norm": 0.07959852367639542, "learning_rate": 3.1469413960716474e-05, "loss": 6.9889, "step": 20230 }, { "epoch": 0.03883984867591425, "grad_norm": 0.08509952574968338, "learning_rate": 3.144442217171281e-05, "loss": 6.9915, "step": 20240 }, { "epoch": 0.03909205548549811, "grad_norm": 0.06802200525999069, "learning_rate": 3.141942937792725e-05, "loss": 6.9739, "step": 20250 }, { "epoch": 0.03934426229508197, "grad_norm": 0.06863825768232346, "learning_rate": 3.139443559674551e-05, "loss": 6.9749, "step": 20260 }, { "epoch": 0.03959646910466583, "grad_norm": 0.09833567589521408, "learning_rate": 3.136944084555397e-05, "loss": 6.9854, "step": 20270 }, { "epoch": 0.03984867591424968, "grad_norm": 0.0912712886929512, "learning_rate": 3.134444514173971e-05, "loss": 6.9956, "step": 20280 }, { "epoch": 0.040100882723833546, "grad_norm": 0.0822669044137001, "learning_rate": 3.131944850269048e-05, "loss": 6.9901, "step": 20290 }, { "epoch": 0.0403530895334174, "grad_norm": 0.08045352250337601, "learning_rate": 3.129445094579466e-05, "loss": 6.9904, "step": 20300 }, { "epoch": 0.04060529634300126, "grad_norm": 0.08694518357515335, "learning_rate": 3.1269452488441256e-05, "loss": 6.9869, "step": 20310 }, { "epoch": 0.04085750315258512, "grad_norm": 0.0818982720375061, "learning_rate": 3.124445314801994e-05, "loss": 6.9912, "step": 20320 }, { "epoch": 0.04110970996216898, "grad_norm": 0.0893910601735115, "learning_rate": 3.1219452941920977e-05, "loss": 6.9943, "step": 20330 }, { "epoch": 0.04136191677175284, "grad_norm": 0.08518873900175095, "learning_rate": 3.1194451887535224e-05, "loss": 6.9928, "step": 20340 }, { "epoch": 0.0416141235813367, "grad_norm": 0.08268500119447708, "learning_rate": 3.116945000225414e-05, "loss": 6.9918, "step": 20350 }, { "epoch": 0.04186633039092055, "grad_norm": 0.09325844049453735, "learning_rate": 3.114444730346976e-05, "loss": 6.9971, "step": 20360 }, { "epoch": 0.042118537200504416, "grad_norm": 0.08846545219421387, "learning_rate": 3.11194438085747e-05, "loss": 6.9947, "step": 20370 }, { "epoch": 0.04237074401008827, "grad_norm": 0.08122814446687698, "learning_rate": 3.1094439534962094e-05, "loss": 6.9931, "step": 20380 }, { "epoch": 0.04262295081967213, "grad_norm": 0.08375494182109833, "learning_rate": 3.106943450002566e-05, "loss": 6.9921, "step": 20390 }, { "epoch": 0.04287515762925599, "grad_norm": 0.08015108108520508, "learning_rate": 3.1044428721159596e-05, "loss": 6.9924, "step": 20400 }, { "epoch": 0.04312736443883985, "grad_norm": 0.08428292721509933, "learning_rate": 3.101942221575867e-05, "loss": 6.9909, "step": 20410 }, { "epoch": 0.04337957124842371, "grad_norm": 0.0811268612742424, "learning_rate": 3.099441500121814e-05, "loss": 6.985, "step": 20420 }, { "epoch": 0.04363177805800757, "grad_norm": 0.07077985256910324, "learning_rate": 3.096940709493372e-05, "loss": 6.9867, "step": 20430 }, { "epoch": 0.04388398486759142, "grad_norm": 0.07824340462684631, "learning_rate": 3.0944398514301644e-05, "loss": 6.9827, "step": 20440 }, { "epoch": 0.044136191677175286, "grad_norm": 0.07319098711013794, "learning_rate": 3.091938927671862e-05, "loss": 6.9829, "step": 20450 }, { "epoch": 0.04438839848675914, "grad_norm": 0.08710586279630661, "learning_rate": 3.089437939958177e-05, "loss": 6.9919, "step": 20460 }, { "epoch": 0.044640605296343, "grad_norm": 0.08120659738779068, "learning_rate": 3.086936890028872e-05, "loss": 6.9899, "step": 20470 }, { "epoch": 0.04489281210592686, "grad_norm": 0.07913222163915634, "learning_rate": 3.084435779623748e-05, "loss": 6.9884, "step": 20480 }, { "epoch": 0.04514501891551072, "grad_norm": 0.08541205525398254, "learning_rate": 3.08193461048265e-05, "loss": 6.9928, "step": 20490 }, { "epoch": 0.04539722572509458, "grad_norm": 0.08822482079267502, "learning_rate": 3.0794333843454634e-05, "loss": 6.9964, "step": 20500 }, { "epoch": 0.04564943253467844, "grad_norm": 0.08614523708820343, "learning_rate": 3.0769321029521146e-05, "loss": 6.9959, "step": 20510 }, { "epoch": 0.04590163934426229, "grad_norm": 0.08899133652448654, "learning_rate": 3.074430768042567e-05, "loss": 6.9933, "step": 20520 }, { "epoch": 0.046153846153846156, "grad_norm": 0.08730828762054443, "learning_rate": 3.0719293813568204e-05, "loss": 6.9956, "step": 20530 }, { "epoch": 0.04640605296343001, "grad_norm": 0.09099544584751129, "learning_rate": 3.069427944634913e-05, "loss": 6.9989, "step": 20540 }, { "epoch": 0.04665825977301387, "grad_norm": 0.07481458783149719, "learning_rate": 3.066926459616918e-05, "loss": 6.9845, "step": 20550 }, { "epoch": 0.04691046658259773, "grad_norm": 0.059941090643405914, "learning_rate": 3.064424928042938e-05, "loss": 6.9758, "step": 20560 }, { "epoch": 0.04716267339218159, "grad_norm": 0.06427902728319168, "learning_rate": 3.061923351653112e-05, "loss": 6.9786, "step": 20570 }, { "epoch": 0.04741488020176545, "grad_norm": 0.07448142021894455, "learning_rate": 3.05942173218761e-05, "loss": 6.9787, "step": 20580 }, { "epoch": 0.04766708701134931, "grad_norm": 0.07918905466794968, "learning_rate": 3.056920071386629e-05, "loss": 6.987, "step": 20590 }, { "epoch": 0.04791929382093316, "grad_norm": 0.09048294275999069, "learning_rate": 3.0544183709903976e-05, "loss": 6.9928, "step": 20600 }, { "epoch": 0.048171500630517027, "grad_norm": 0.08815166354179382, "learning_rate": 3.051916632739172e-05, "loss": 6.9961, "step": 20610 }, { "epoch": 0.04842370744010088, "grad_norm": 0.08422872424125671, "learning_rate": 3.0494148583732316e-05, "loss": 6.9952, "step": 20620 }, { "epoch": 0.04867591424968474, "grad_norm": 0.07449697703123093, "learning_rate": 3.0469130496328864e-05, "loss": 6.9877, "step": 20630 }, { "epoch": 0.0489281210592686, "grad_norm": 0.08626198023557663, "learning_rate": 3.044411208258464e-05, "loss": 6.9843, "step": 20640 }, { "epoch": 0.04918032786885246, "grad_norm": 0.0825851559638977, "learning_rate": 3.0419093359903197e-05, "loss": 6.9832, "step": 20650 }, { "epoch": 0.04943253467843632, "grad_norm": 0.07943921536207199, "learning_rate": 3.0394074345688264e-05, "loss": 6.987, "step": 20660 }, { "epoch": 0.04968474148802018, "grad_norm": 0.05943804979324341, "learning_rate": 3.036905505734381e-05, "loss": 6.9779, "step": 20670 }, { "epoch": 0.049936948297604034, "grad_norm": 0.06349384039640427, "learning_rate": 3.034403551227397e-05, "loss": 6.9692, "step": 20680 }, { "epoch": 0.0501891551071879, "grad_norm": 0.07214104384183884, "learning_rate": 3.0319015727883055e-05, "loss": 6.9697, "step": 20690 }, { "epoch": 0.05044136191677175, "grad_norm": 0.07687079906463623, "learning_rate": 3.029399572157556e-05, "loss": 6.9833, "step": 20700 }, { "epoch": 0.05069356872635561, "grad_norm": 0.08245659619569778, "learning_rate": 3.0268975510756133e-05, "loss": 6.9906, "step": 20710 }, { "epoch": 0.05094577553593947, "grad_norm": 0.09125787019729614, "learning_rate": 3.0243955112829546e-05, "loss": 6.996, "step": 20720 }, { "epoch": 0.05119798234552333, "grad_norm": 0.08896153420209885, "learning_rate": 3.021893454520072e-05, "loss": 6.9943, "step": 20730 }, { "epoch": 0.05145018915510719, "grad_norm": 0.08919835835695267, "learning_rate": 3.0193913825274678e-05, "loss": 6.9919, "step": 20740 }, { "epoch": 0.05170239596469105, "grad_norm": 0.08934593945741653, "learning_rate": 3.0168892970456568e-05, "loss": 6.9926, "step": 20750 }, { "epoch": 0.051954602774274904, "grad_norm": 0.073870450258255, "learning_rate": 3.0143871998151616e-05, "loss": 6.9895, "step": 20760 }, { "epoch": 0.05220680958385877, "grad_norm": 0.08059633523225784, "learning_rate": 3.011885092576513e-05, "loss": 6.9892, "step": 20770 }, { "epoch": 0.05245901639344262, "grad_norm": 0.08520019054412842, "learning_rate": 3.0093829770702505e-05, "loss": 6.9898, "step": 20780 }, { "epoch": 0.05271122320302648, "grad_norm": 0.08255846053361893, "learning_rate": 3.0068808550369172e-05, "loss": 6.9948, "step": 20790 }, { "epoch": 0.05296343001261034, "grad_norm": 0.0714043453335762, "learning_rate": 3.004378728217061e-05, "loss": 6.9827, "step": 20800 }, { "epoch": 0.0532156368221942, "grad_norm": 0.06543158739805222, "learning_rate": 3.0018765983512356e-05, "loss": 6.9795, "step": 20810 }, { "epoch": 0.05346784363177806, "grad_norm": 0.08895495533943176, "learning_rate": 2.999374467179993e-05, "loss": 6.9838, "step": 20820 }, { "epoch": 0.05372005044136192, "grad_norm": 0.09420471638441086, "learning_rate": 2.9968723364438903e-05, "loss": 6.9949, "step": 20830 }, { "epoch": 0.053972257250945774, "grad_norm": 0.0877799466252327, "learning_rate": 2.99437020788348e-05, "loss": 6.9922, "step": 20840 }, { "epoch": 0.05422446406052964, "grad_norm": 0.08419880270957947, "learning_rate": 2.9918680832393173e-05, "loss": 6.993, "step": 20850 }, { "epoch": 0.05447667087011349, "grad_norm": 0.09340495616197586, "learning_rate": 2.989365964251951e-05, "loss": 6.9975, "step": 20860 }, { "epoch": 0.05472887767969735, "grad_norm": 0.08808037638664246, "learning_rate": 2.9868638526619273e-05, "loss": 6.9939, "step": 20870 }, { "epoch": 0.05498108448928121, "grad_norm": 0.07027871161699295, "learning_rate": 2.984361750209789e-05, "loss": 6.9846, "step": 20880 }, { "epoch": 0.05523329129886507, "grad_norm": 0.0692792534828186, "learning_rate": 2.9818596586360697e-05, "loss": 6.9758, "step": 20890 }, { "epoch": 0.05548549810844893, "grad_norm": 0.07505670189857483, "learning_rate": 2.9793575796812978e-05, "loss": 6.9789, "step": 20900 }, { "epoch": 0.05573770491803279, "grad_norm": 0.08192512392997742, "learning_rate": 2.9768555150859904e-05, "loss": 6.983, "step": 20910 }, { "epoch": 0.055989911727616644, "grad_norm": 0.08867453783750534, "learning_rate": 2.9743534665906577e-05, "loss": 6.9956, "step": 20920 }, { "epoch": 0.05624211853720051, "grad_norm": 0.09147784113883972, "learning_rate": 2.9718514359357955e-05, "loss": 6.9955, "step": 20930 }, { "epoch": 0.05649432534678436, "grad_norm": 0.08835006505250931, "learning_rate": 2.9693494248618915e-05, "loss": 6.9941, "step": 20940 }, { "epoch": 0.05674653215636822, "grad_norm": 0.09542607516050339, "learning_rate": 2.9668474351094135e-05, "loss": 6.9943, "step": 20950 }, { "epoch": 0.05699873896595208, "grad_norm": 0.0822034403681755, "learning_rate": 2.964345468418821e-05, "loss": 6.9937, "step": 20960 }, { "epoch": 0.05725094577553594, "grad_norm": 0.07580950856208801, "learning_rate": 2.961843526530553e-05, "loss": 6.9897, "step": 20970 }, { "epoch": 0.0575031525851198, "grad_norm": 0.07783206552267075, "learning_rate": 2.9593416111850332e-05, "loss": 6.9846, "step": 20980 }, { "epoch": 0.05775535939470366, "grad_norm": 0.06197177618741989, "learning_rate": 2.9568397241226664e-05, "loss": 6.9758, "step": 20990 }, { "epoch": 0.058007566204287514, "grad_norm": 0.06147775799036026, "learning_rate": 2.954337867083837e-05, "loss": 6.9698, "step": 21000 }, { "epoch": 0.05825977301387138, "grad_norm": 0.06806230545043945, "learning_rate": 2.951836041808911e-05, "loss": 6.9738, "step": 21010 }, { "epoch": 0.05851197982345523, "grad_norm": 0.07509814947843552, "learning_rate": 2.949334250038228e-05, "loss": 6.9832, "step": 21020 }, { "epoch": 0.05876418663303909, "grad_norm": 0.06963100284337997, "learning_rate": 2.9468324935121087e-05, "loss": 6.9838, "step": 21030 }, { "epoch": 0.05901639344262295, "grad_norm": 0.08101928234100342, "learning_rate": 2.9443307739708464e-05, "loss": 6.9886, "step": 21040 }, { "epoch": 0.05926860025220681, "grad_norm": 0.0801074206829071, "learning_rate": 2.9418290931547108e-05, "loss": 6.9875, "step": 21050 }, { "epoch": 0.05952080706179067, "grad_norm": 0.07951697707176208, "learning_rate": 2.9393274528039415e-05, "loss": 6.9846, "step": 21060 }, { "epoch": 0.05977301387137453, "grad_norm": 0.07328716665506363, "learning_rate": 2.9368258546587552e-05, "loss": 6.9814, "step": 21070 }, { "epoch": 0.060025220680958384, "grad_norm": 0.07294756919145584, "learning_rate": 2.9343243004593322e-05, "loss": 6.9815, "step": 21080 }, { "epoch": 0.06027742749054225, "grad_norm": 0.07678815722465515, "learning_rate": 2.9318227919458293e-05, "loss": 6.9844, "step": 21090 }, { "epoch": 0.0605296343001261, "grad_norm": 0.08764392882585526, "learning_rate": 2.9293213308583676e-05, "loss": 6.9933, "step": 21100 }, { "epoch": 0.06078184110970996, "grad_norm": 0.08896923065185547, "learning_rate": 2.9268199189370332e-05, "loss": 6.9929, "step": 21110 }, { "epoch": 0.06103404791929382, "grad_norm": 0.08528666943311691, "learning_rate": 2.9243185579218843e-05, "loss": 6.9928, "step": 21120 }, { "epoch": 0.06128625472887768, "grad_norm": 0.08646918088197708, "learning_rate": 2.9218172495529376e-05, "loss": 6.9926, "step": 21130 }, { "epoch": 0.06153846153846154, "grad_norm": 0.0894322320818901, "learning_rate": 2.9193159955701774e-05, "loss": 6.9842, "step": 21140 }, { "epoch": 0.0617906683480454, "grad_norm": 0.09580285102128983, "learning_rate": 2.9168147977135468e-05, "loss": 7.0018, "step": 21150 }, { "epoch": 0.062042875157629254, "grad_norm": 0.08541412651538849, "learning_rate": 2.914313657722953e-05, "loss": 7.0003, "step": 21160 }, { "epoch": 0.06229508196721312, "grad_norm": 0.09793522953987122, "learning_rate": 2.9118125773382596e-05, "loss": 7.0034, "step": 21170 }, { "epoch": 0.06254728877679698, "grad_norm": 0.08663418889045715, "learning_rate": 2.909311558299293e-05, "loss": 6.9943, "step": 21180 }, { "epoch": 0.06279949558638083, "grad_norm": 0.09234649688005447, "learning_rate": 2.906810602345831e-05, "loss": 6.9921, "step": 21190 }, { "epoch": 0.06305170239596469, "grad_norm": 0.07890582829713821, "learning_rate": 2.9043097112176153e-05, "loss": 6.9898, "step": 21200 }, { "epoch": 0.06330390920554856, "grad_norm": 0.07312845438718796, "learning_rate": 2.901808886654334e-05, "loss": 6.9893, "step": 21210 }, { "epoch": 0.0635561160151324, "grad_norm": 0.0761176347732544, "learning_rate": 2.8993081303956367e-05, "loss": 6.9844, "step": 21220 }, { "epoch": 0.06380832282471627, "grad_norm": 0.06898761540651321, "learning_rate": 2.8968074441811206e-05, "loss": 6.984, "step": 21230 }, { "epoch": 0.06406052963430013, "grad_norm": 0.07157117873430252, "learning_rate": 2.8943068297503335e-05, "loss": 6.9794, "step": 21240 }, { "epoch": 0.06431273644388398, "grad_norm": 0.07128357142210007, "learning_rate": 2.891806288842778e-05, "loss": 6.9814, "step": 21250 }, { "epoch": 0.06456494325346784, "grad_norm": 0.06366443634033203, "learning_rate": 2.889305823197901e-05, "loss": 6.9732, "step": 21260 }, { "epoch": 0.0648171500630517, "grad_norm": 0.07343954592943192, "learning_rate": 2.8868054345550997e-05, "loss": 6.9775, "step": 21270 }, { "epoch": 0.06506935687263556, "grad_norm": 0.08088327199220657, "learning_rate": 2.8843051246537167e-05, "loss": 6.9871, "step": 21280 }, { "epoch": 0.06532156368221942, "grad_norm": 0.0782504603266716, "learning_rate": 2.88180489523304e-05, "loss": 6.989, "step": 21290 }, { "epoch": 0.06557377049180328, "grad_norm": 0.07517241686582565, "learning_rate": 2.879304748032301e-05, "loss": 6.9868, "step": 21300 }, { "epoch": 0.06582597730138713, "grad_norm": 0.08858902752399445, "learning_rate": 2.8768046847906755e-05, "loss": 6.9882, "step": 21310 }, { "epoch": 0.066078184110971, "grad_norm": 0.09172829985618591, "learning_rate": 2.8743047072472784e-05, "loss": 6.9987, "step": 21320 }, { "epoch": 0.06633039092055486, "grad_norm": 0.09504150599241257, "learning_rate": 2.8718048171411696e-05, "loss": 6.9997, "step": 21330 }, { "epoch": 0.06658259773013872, "grad_norm": 0.08841073513031006, "learning_rate": 2.8693050162113403e-05, "loss": 7.0032, "step": 21340 }, { "epoch": 0.06683480453972257, "grad_norm": 0.08917120099067688, "learning_rate": 2.866805306196729e-05, "loss": 6.9946, "step": 21350 }, { "epoch": 0.06708701134930643, "grad_norm": 0.0771327093243599, "learning_rate": 2.8643056888362053e-05, "loss": 6.9881, "step": 21360 }, { "epoch": 0.0673392181588903, "grad_norm": 0.0754358097910881, "learning_rate": 2.861806165868572e-05, "loss": 6.9787, "step": 21370 }, { "epoch": 0.06759142496847415, "grad_norm": 0.07839083671569824, "learning_rate": 2.859306739032574e-05, "loss": 6.9835, "step": 21380 }, { "epoch": 0.06784363177805801, "grad_norm": 0.07817549258470535, "learning_rate": 2.8568074100668826e-05, "loss": 6.9866, "step": 21390 }, { "epoch": 0.06809583858764187, "grad_norm": 0.07231250405311584, "learning_rate": 2.8543081807101045e-05, "loss": 6.9853, "step": 21400 }, { "epoch": 0.06834804539722572, "grad_norm": 0.06513158231973648, "learning_rate": 2.8518090527007752e-05, "loss": 6.9728, "step": 21410 }, { "epoch": 0.06860025220680958, "grad_norm": 0.06650856137275696, "learning_rate": 2.8493100277773616e-05, "loss": 6.9729, "step": 21420 }, { "epoch": 0.06885245901639345, "grad_norm": 0.072236567735672, "learning_rate": 2.8468111076782564e-05, "loss": 6.9748, "step": 21430 }, { "epoch": 0.0691046658259773, "grad_norm": 0.07223080843687057, "learning_rate": 2.8443122941417824e-05, "loss": 6.9776, "step": 21440 }, { "epoch": 0.06935687263556116, "grad_norm": 0.0679786428809166, "learning_rate": 2.8418135889061845e-05, "loss": 6.9768, "step": 21450 }, { "epoch": 0.06960907944514502, "grad_norm": 0.08073627203702927, "learning_rate": 2.8393149937096377e-05, "loss": 6.9848, "step": 21460 }, { "epoch": 0.06986128625472887, "grad_norm": 0.08544517308473587, "learning_rate": 2.8368165102902345e-05, "loss": 6.9934, "step": 21470 }, { "epoch": 0.07011349306431273, "grad_norm": 0.08279824256896973, "learning_rate": 2.8343181403859917e-05, "loss": 6.9904, "step": 21480 }, { "epoch": 0.0703656998738966, "grad_norm": 0.0766916275024414, "learning_rate": 2.8318198857348515e-05, "loss": 6.9862, "step": 21490 }, { "epoch": 0.07061790668348046, "grad_norm": 0.07347358018159866, "learning_rate": 2.8293217480746673e-05, "loss": 6.9854, "step": 21500 }, { "epoch": 0.07087011349306431, "grad_norm": 0.07847356051206589, "learning_rate": 2.82682372914322e-05, "loss": 6.985, "step": 21510 }, { "epoch": 0.07112232030264817, "grad_norm": 0.0744510218501091, "learning_rate": 2.824325830678202e-05, "loss": 6.9864, "step": 21520 }, { "epoch": 0.07137452711223204, "grad_norm": 0.07974319905042648, "learning_rate": 2.8218280544172254e-05, "loss": 6.9863, "step": 21530 }, { "epoch": 0.07162673392181589, "grad_norm": 0.08690357953310013, "learning_rate": 2.8193304020978134e-05, "loss": 6.9896, "step": 21540 }, { "epoch": 0.07187894073139975, "grad_norm": 0.08430934697389603, "learning_rate": 2.8168328754574072e-05, "loss": 6.9901, "step": 21550 }, { "epoch": 0.07213114754098361, "grad_norm": 0.08114370703697205, "learning_rate": 2.8143354762333576e-05, "loss": 6.988, "step": 21560 }, { "epoch": 0.07238335435056746, "grad_norm": 0.08860767632722855, "learning_rate": 2.8118382061629286e-05, "loss": 6.9878, "step": 21570 }, { "epoch": 0.07263556116015132, "grad_norm": 0.08037415146827698, "learning_rate": 2.8093410669832918e-05, "loss": 6.9875, "step": 21580 }, { "epoch": 0.07288776796973519, "grad_norm": 0.07638972252607346, "learning_rate": 2.806844060431533e-05, "loss": 6.9855, "step": 21590 }, { "epoch": 0.07313997477931904, "grad_norm": 0.07577960938215256, "learning_rate": 2.8043471882446393e-05, "loss": 6.9846, "step": 21600 }, { "epoch": 0.0733921815889029, "grad_norm": 0.08524307608604431, "learning_rate": 2.8018504521595073e-05, "loss": 6.992, "step": 21610 }, { "epoch": 0.07364438839848676, "grad_norm": 0.08077868819236755, "learning_rate": 2.799353853912942e-05, "loss": 6.9932, "step": 21620 }, { "epoch": 0.07389659520807061, "grad_norm": 0.0898771584033966, "learning_rate": 2.7968573952416448e-05, "loss": 6.9933, "step": 21630 }, { "epoch": 0.07414880201765447, "grad_norm": 0.05760233849287033, "learning_rate": 2.794361077882229e-05, "loss": 6.9851, "step": 21640 }, { "epoch": 0.07440100882723834, "grad_norm": 0.06005936488509178, "learning_rate": 2.7918649035712022e-05, "loss": 6.9669, "step": 21650 }, { "epoch": 0.0746532156368222, "grad_norm": 0.06383848935365677, "learning_rate": 2.789368874044978e-05, "loss": 6.9709, "step": 21660 }, { "epoch": 0.07490542244640605, "grad_norm": 0.08015543967485428, "learning_rate": 2.7868729910398644e-05, "loss": 6.9821, "step": 21670 }, { "epoch": 0.07515762925598991, "grad_norm": 0.07768207788467407, "learning_rate": 2.7843772562920722e-05, "loss": 6.9882, "step": 21680 }, { "epoch": 0.07540983606557378, "grad_norm": 0.07080978155136108, "learning_rate": 2.781881671537705e-05, "loss": 6.9806, "step": 21690 }, { "epoch": 0.07566204287515763, "grad_norm": 0.0752795934677124, "learning_rate": 2.7793862385127647e-05, "loss": 6.9838, "step": 21700 }, { "epoch": 0.07591424968474149, "grad_norm": 0.06197306513786316, "learning_rate": 2.7768909589531468e-05, "loss": 6.978, "step": 21710 }, { "epoch": 0.07616645649432535, "grad_norm": 0.06996246427297592, "learning_rate": 2.7743958345946395e-05, "loss": 6.9769, "step": 21720 }, { "epoch": 0.0764186633039092, "grad_norm": 0.06347585469484329, "learning_rate": 2.7719008671729242e-05, "loss": 6.9786, "step": 21730 }, { "epoch": 0.07667087011349306, "grad_norm": 0.0705699622631073, "learning_rate": 2.7694060584235714e-05, "loss": 6.9794, "step": 21740 }, { "epoch": 0.07692307692307693, "grad_norm": 0.07982660084962845, "learning_rate": 2.7669114100820445e-05, "loss": 6.987, "step": 21750 }, { "epoch": 0.07717528373266078, "grad_norm": 0.08747733384370804, "learning_rate": 2.76441692388369e-05, "loss": 6.9912, "step": 21760 }, { "epoch": 0.07742749054224464, "grad_norm": 0.08737927675247192, "learning_rate": 2.761922601563748e-05, "loss": 6.9935, "step": 21770 }, { "epoch": 0.0776796973518285, "grad_norm": 0.0895504429936409, "learning_rate": 2.7594284448573396e-05, "loss": 6.9936, "step": 21780 }, { "epoch": 0.07793190416141235, "grad_norm": 0.08755683153867722, "learning_rate": 2.7569344554994728e-05, "loss": 6.9925, "step": 21790 }, { "epoch": 0.07818411097099622, "grad_norm": 0.08771742135286331, "learning_rate": 2.754440635225038e-05, "loss": 6.9938, "step": 21800 }, { "epoch": 0.07843631778058008, "grad_norm": 0.09770841151475906, "learning_rate": 2.7519469857688105e-05, "loss": 6.9995, "step": 21810 }, { "epoch": 0.07868852459016394, "grad_norm": 0.09476637840270996, "learning_rate": 2.7494535088654434e-05, "loss": 7.0013, "step": 21820 }, { "epoch": 0.07894073139974779, "grad_norm": 0.08871057629585266, "learning_rate": 2.746960206249473e-05, "loss": 6.9952, "step": 21830 }, { "epoch": 0.07919293820933165, "grad_norm": 0.08482552319765091, "learning_rate": 2.744467079655312e-05, "loss": 6.9911, "step": 21840 }, { "epoch": 0.07944514501891552, "grad_norm": 0.08749604970216751, "learning_rate": 2.7419741308172507e-05, "loss": 6.9932, "step": 21850 }, { "epoch": 0.07969735182849937, "grad_norm": 0.07931304723024368, "learning_rate": 2.7394813614694578e-05, "loss": 6.9873, "step": 21860 }, { "epoch": 0.07994955863808323, "grad_norm": 0.0722198411822319, "learning_rate": 2.736988773345975e-05, "loss": 6.9887, "step": 21870 }, { "epoch": 0.08020176544766709, "grad_norm": 0.07165587693452835, "learning_rate": 2.7344963681807196e-05, "loss": 6.9822, "step": 21880 }, { "epoch": 0.08045397225725094, "grad_norm": 0.07557129114866257, "learning_rate": 2.7320041477074792e-05, "loss": 6.9851, "step": 21890 }, { "epoch": 0.0807061790668348, "grad_norm": 0.08370423316955566, "learning_rate": 2.729512113659917e-05, "loss": 6.9874, "step": 21900 }, { "epoch": 0.08095838587641867, "grad_norm": 0.078025221824646, "learning_rate": 2.7270202677715622e-05, "loss": 6.9855, "step": 21910 }, { "epoch": 0.08121059268600252, "grad_norm": 0.05975275859236717, "learning_rate": 2.724528611775816e-05, "loss": 6.98, "step": 21920 }, { "epoch": 0.08146279949558638, "grad_norm": 0.051544494926929474, "learning_rate": 2.7220371474059457e-05, "loss": 6.9702, "step": 21930 }, { "epoch": 0.08171500630517024, "grad_norm": 0.06059694662690163, "learning_rate": 2.719545876395087e-05, "loss": 6.973, "step": 21940 }, { "epoch": 0.08196721311475409, "grad_norm": 0.0654018446803093, "learning_rate": 2.7170548004762383e-05, "loss": 6.9752, "step": 21950 }, { "epoch": 0.08221941992433796, "grad_norm": 0.06079825386404991, "learning_rate": 2.714563921382267e-05, "loss": 6.9757, "step": 21960 }, { "epoch": 0.08247162673392182, "grad_norm": 0.07836627215147018, "learning_rate": 2.7120732408458986e-05, "loss": 6.9878, "step": 21970 }, { "epoch": 0.08272383354350568, "grad_norm": 0.09112227708101273, "learning_rate": 2.7095827605997226e-05, "loss": 6.9932, "step": 21980 }, { "epoch": 0.08297604035308953, "grad_norm": 0.08282512426376343, "learning_rate": 2.7070924823761907e-05, "loss": 6.993, "step": 21990 }, { "epoch": 0.0832282471626734, "grad_norm": 0.08134185522794724, "learning_rate": 2.7046024079076106e-05, "loss": 6.9921, "step": 22000 }, { "epoch": 0.08348045397225726, "grad_norm": 0.08529815077781677, "learning_rate": 2.7021125389261522e-05, "loss": 6.9923, "step": 22010 }, { "epoch": 0.0837326607818411, "grad_norm": 0.08679496496915817, "learning_rate": 2.6996228771638383e-05, "loss": 6.9929, "step": 22020 }, { "epoch": 0.08398486759142497, "grad_norm": 0.08404955267906189, "learning_rate": 2.6971334243525527e-05, "loss": 6.9934, "step": 22030 }, { "epoch": 0.08423707440100883, "grad_norm": 0.07682210952043533, "learning_rate": 2.6946441822240275e-05, "loss": 6.9876, "step": 22040 }, { "epoch": 0.08448928121059268, "grad_norm": 0.07457254081964493, "learning_rate": 2.6921551525098542e-05, "loss": 6.9809, "step": 22050 }, { "epoch": 0.08474148802017654, "grad_norm": 0.07452739030122757, "learning_rate": 2.6896663369414726e-05, "loss": 6.9853, "step": 22060 }, { "epoch": 0.08499369482976041, "grad_norm": 0.07545879483222961, "learning_rate": 2.687177737250176e-05, "loss": 6.986, "step": 22070 }, { "epoch": 0.08524590163934426, "grad_norm": 0.07809851318597794, "learning_rate": 2.6846893551671057e-05, "loss": 6.9843, "step": 22080 }, { "epoch": 0.08549810844892812, "grad_norm": 0.07783644646406174, "learning_rate": 2.6822011924232514e-05, "loss": 6.9858, "step": 22090 }, { "epoch": 0.08575031525851198, "grad_norm": 0.08201733976602554, "learning_rate": 2.6797132507494534e-05, "loss": 6.9882, "step": 22100 }, { "epoch": 0.08600252206809583, "grad_norm": 0.07154092937707901, "learning_rate": 2.677225531876394e-05, "loss": 6.9856, "step": 22110 }, { "epoch": 0.0862547288776797, "grad_norm": 0.073533795773983, "learning_rate": 2.674738037534604e-05, "loss": 6.9837, "step": 22120 }, { "epoch": 0.08650693568726356, "grad_norm": 0.08164063096046448, "learning_rate": 2.6722507694544546e-05, "loss": 6.9866, "step": 22130 }, { "epoch": 0.08675914249684742, "grad_norm": 0.07810612767934799, "learning_rate": 2.6697637293661638e-05, "loss": 6.9883, "step": 22140 }, { "epoch": 0.08701134930643127, "grad_norm": 0.08244123309850693, "learning_rate": 2.6672769189997864e-05, "loss": 6.9909, "step": 22150 }, { "epoch": 0.08726355611601513, "grad_norm": 0.07919011265039444, "learning_rate": 2.6647903400852224e-05, "loss": 6.9872, "step": 22160 }, { "epoch": 0.087515762925599, "grad_norm": 0.059914521872997284, "learning_rate": 2.662303994352205e-05, "loss": 6.9842, "step": 22170 }, { "epoch": 0.08776796973518285, "grad_norm": 0.06848547607660294, "learning_rate": 2.6598178835303112e-05, "loss": 6.9737, "step": 22180 }, { "epoch": 0.08802017654476671, "grad_norm": 0.0740695670247078, "learning_rate": 2.65733200934895e-05, "loss": 6.98, "step": 22190 }, { "epoch": 0.08827238335435057, "grad_norm": 0.07762891799211502, "learning_rate": 2.6548463735373683e-05, "loss": 6.9833, "step": 22200 }, { "epoch": 0.08852459016393442, "grad_norm": 0.07520192116498947, "learning_rate": 2.6523609778246458e-05, "loss": 6.985, "step": 22210 }, { "epoch": 0.08877679697351828, "grad_norm": 0.07908309251070023, "learning_rate": 2.6498758239396955e-05, "loss": 6.9876, "step": 22220 }, { "epoch": 0.08902900378310215, "grad_norm": 0.06367400288581848, "learning_rate": 2.6473909136112637e-05, "loss": 6.9817, "step": 22230 }, { "epoch": 0.089281210592686, "grad_norm": 0.06269841641187668, "learning_rate": 2.644906248567924e-05, "loss": 6.976, "step": 22240 }, { "epoch": 0.08953341740226986, "grad_norm": 0.05896258354187012, "learning_rate": 2.6424218305380835e-05, "loss": 6.9711, "step": 22250 }, { "epoch": 0.08978562421185372, "grad_norm": 0.06837170571088791, "learning_rate": 2.6399376612499737e-05, "loss": 6.9811, "step": 22260 }, { "epoch": 0.09003783102143757, "grad_norm": 0.07625794410705566, "learning_rate": 2.6374537424316558e-05, "loss": 6.9884, "step": 22270 }, { "epoch": 0.09029003783102144, "grad_norm": 0.07754842191934586, "learning_rate": 2.634970075811014e-05, "loss": 6.9886, "step": 22280 }, { "epoch": 0.0905422446406053, "grad_norm": 0.0895523950457573, "learning_rate": 2.632486663115762e-05, "loss": 6.9912, "step": 22290 }, { "epoch": 0.09079445145018916, "grad_norm": 0.07245009392499924, "learning_rate": 2.6300035060734293e-05, "loss": 6.9864, "step": 22300 }, { "epoch": 0.09104665825977301, "grad_norm": 0.07598204165697098, "learning_rate": 2.6275206064113762e-05, "loss": 6.9815, "step": 22310 }, { "epoch": 0.09129886506935687, "grad_norm": 0.08229893445968628, "learning_rate": 2.6250379658567762e-05, "loss": 6.9854, "step": 22320 }, { "epoch": 0.09155107187894074, "grad_norm": 0.0771491602063179, "learning_rate": 2.6225555861366278e-05, "loss": 6.9877, "step": 22330 }, { "epoch": 0.09180327868852459, "grad_norm": 0.065254807472229, "learning_rate": 2.620073468977746e-05, "loss": 6.9825, "step": 22340 }, { "epoch": 0.09205548549810845, "grad_norm": 0.07108163088560104, "learning_rate": 2.617591616106763e-05, "loss": 6.98, "step": 22350 }, { "epoch": 0.09230769230769231, "grad_norm": 0.07295969128608704, "learning_rate": 2.6151100292501277e-05, "loss": 6.9821, "step": 22360 }, { "epoch": 0.09255989911727616, "grad_norm": 0.06976266950368881, "learning_rate": 2.6126287101341036e-05, "loss": 6.9821, "step": 22370 }, { "epoch": 0.09281210592686003, "grad_norm": 0.07716602087020874, "learning_rate": 2.6101476604847686e-05, "loss": 6.9817, "step": 22380 }, { "epoch": 0.09306431273644389, "grad_norm": 0.08631538599729538, "learning_rate": 2.6076668820280116e-05, "loss": 6.9845, "step": 22390 }, { "epoch": 0.09331651954602774, "grad_norm": 0.08507665991783142, "learning_rate": 2.6051863764895355e-05, "loss": 7.0001, "step": 22400 }, { "epoch": 0.00025220680958385876, "grad_norm": 0.07350008934736252, "learning_rate": 2.6027061455948498e-05, "loss": 6.99, "step": 22410 }, { "epoch": 0.0005044136191677175, "grad_norm": 0.06693128496408463, "learning_rate": 2.6002261910692782e-05, "loss": 6.9801, "step": 22420 }, { "epoch": 0.0007566204287515763, "grad_norm": 0.08638565987348557, "learning_rate": 2.597746514637945e-05, "loss": 6.9775, "step": 22430 }, { "epoch": 0.001008827238335435, "grad_norm": 0.07821646332740784, "learning_rate": 2.595267118025788e-05, "loss": 6.9869, "step": 22440 }, { "epoch": 0.0012610340479192938, "grad_norm": 0.08344040811061859, "learning_rate": 2.5927880029575468e-05, "loss": 6.9816, "step": 22450 }, { "epoch": 0.0015132408575031526, "grad_norm": 0.08186786621809006, "learning_rate": 2.5903091711577632e-05, "loss": 6.9868, "step": 22460 }, { "epoch": 0.0017654476670870113, "grad_norm": 0.07555871456861496, "learning_rate": 2.5878306243507873e-05, "loss": 6.9853, "step": 22470 }, { "epoch": 0.00201765447667087, "grad_norm": 0.07758211344480515, "learning_rate": 2.5853523642607663e-05, "loss": 6.9849, "step": 22480 }, { "epoch": 0.002269861286254729, "grad_norm": 0.0837162658572197, "learning_rate": 2.582874392611651e-05, "loss": 6.985, "step": 22490 }, { "epoch": 0.0025220680958385876, "grad_norm": 0.0853802040219307, "learning_rate": 2.5803967111271888e-05, "loss": 6.9958, "step": 22500 }, { "epoch": 0.0027742749054224464, "grad_norm": 0.07989446073770523, "learning_rate": 2.577919321530928e-05, "loss": 6.9908, "step": 22510 }, { "epoch": 0.003026481715006305, "grad_norm": 0.08117475360631943, "learning_rate": 2.5754422255462117e-05, "loss": 6.9882, "step": 22520 }, { "epoch": 0.003278688524590164, "grad_norm": 0.07377458363771439, "learning_rate": 2.5729654248961804e-05, "loss": 6.9825, "step": 22530 }, { "epoch": 0.0035308953341740227, "grad_norm": 0.0631859228014946, "learning_rate": 2.5704889213037668e-05, "loss": 6.98, "step": 22540 }, { "epoch": 0.0037831021437578815, "grad_norm": 0.0505068339407444, "learning_rate": 2.5680127164917012e-05, "loss": 6.985, "step": 22550 }, { "epoch": 0.00403530895334174, "grad_norm": 0.04554084315896034, "learning_rate": 2.5655368121825002e-05, "loss": 6.9576, "step": 22560 }, { "epoch": 0.004287515762925599, "grad_norm": 0.04630338400602341, "learning_rate": 2.5630612100984773e-05, "loss": 6.9576, "step": 22570 }, { "epoch": 0.004539722572509458, "grad_norm": 0.044307317584753036, "learning_rate": 2.5605859119617325e-05, "loss": 6.9556, "step": 22580 }, { "epoch": 0.0047919293820933165, "grad_norm": 0.04533021152019501, "learning_rate": 2.5581109194941522e-05, "loss": 6.9533, "step": 22590 }, { "epoch": 0.005044136191677175, "grad_norm": 0.08081978559494019, "learning_rate": 2.5556362344174166e-05, "loss": 6.9796, "step": 22600 }, { "epoch": 0.005296343001261034, "grad_norm": 0.08091333508491516, "learning_rate": 2.5531618584529858e-05, "loss": 6.9925, "step": 22610 }, { "epoch": 0.005548549810844893, "grad_norm": 0.08027852326631546, "learning_rate": 2.550687793322109e-05, "loss": 6.9893, "step": 22620 }, { "epoch": 0.005800756620428752, "grad_norm": 0.07459098100662231, "learning_rate": 2.5482140407458156e-05, "loss": 6.9869, "step": 22630 }, { "epoch": 0.00605296343001261, "grad_norm": 0.0660347044467926, "learning_rate": 2.5457406024449217e-05, "loss": 6.9824, "step": 22640 }, { "epoch": 0.006305170239596469, "grad_norm": 0.07348542660474777, "learning_rate": 2.543267480140021e-05, "loss": 6.9812, "step": 22650 }, { "epoch": 0.006557377049180328, "grad_norm": 0.0770292803645134, "learning_rate": 2.54079467555149e-05, "loss": 6.9833, "step": 22660 }, { "epoch": 0.006809583858764187, "grad_norm": 0.06195181980729103, "learning_rate": 2.5383221903994814e-05, "loss": 6.9826, "step": 22670 }, { "epoch": 0.007061790668348045, "grad_norm": 0.06269080191850662, "learning_rate": 2.5358500264039314e-05, "loss": 6.9741, "step": 22680 }, { "epoch": 0.007313997477931904, "grad_norm": 0.08346486836671829, "learning_rate": 2.5333781852845442e-05, "loss": 6.9833, "step": 22690 }, { "epoch": 0.007566204287515763, "grad_norm": 0.07426036149263382, "learning_rate": 2.5309066687608074e-05, "loss": 6.9889, "step": 22700 }, { "epoch": 0.007818411097099623, "grad_norm": 0.08068997412919998, "learning_rate": 2.528435478551979e-05, "loss": 6.9831, "step": 22710 }, { "epoch": 0.00807061790668348, "grad_norm": 0.07198501378297806, "learning_rate": 2.525964616377088e-05, "loss": 6.9839, "step": 22720 }, { "epoch": 0.00832282471626734, "grad_norm": 0.08008075505495071, "learning_rate": 2.523494083954941e-05, "loss": 6.9828, "step": 22730 }, { "epoch": 0.008575031525851198, "grad_norm": 0.0799403116106987, "learning_rate": 2.5210238830041092e-05, "loss": 6.9859, "step": 22740 }, { "epoch": 0.008827238335435058, "grad_norm": 0.08474970608949661, "learning_rate": 2.5185540152429384e-05, "loss": 6.9911, "step": 22750 }, { "epoch": 0.009079445145018916, "grad_norm": 0.0827057883143425, "learning_rate": 2.5160844823895377e-05, "loss": 6.9926, "step": 22760 }, { "epoch": 0.009331651954602775, "grad_norm": 0.08405988663434982, "learning_rate": 2.513615286161788e-05, "loss": 6.9901, "step": 22770 }, { "epoch": 0.009583858764186633, "grad_norm": 0.0848427563905716, "learning_rate": 2.5111464282773316e-05, "loss": 6.9901, "step": 22780 }, { "epoch": 0.009836065573770493, "grad_norm": 0.07957688719034195, "learning_rate": 2.5086779104535796e-05, "loss": 6.99, "step": 22790 }, { "epoch": 0.01008827238335435, "grad_norm": 0.07843109965324402, "learning_rate": 2.5062097344077018e-05, "loss": 6.9896, "step": 22800 }, { "epoch": 0.01034047919293821, "grad_norm": 0.052940867841243744, "learning_rate": 2.5037419018566366e-05, "loss": 6.9725, "step": 22810 }, { "epoch": 0.010592686002522068, "grad_norm": 0.0484757125377655, "learning_rate": 2.5012744145170767e-05, "loss": 6.9634, "step": 22820 }, { "epoch": 0.010844892812105928, "grad_norm": 0.047655247151851654, "learning_rate": 2.4988072741054782e-05, "loss": 6.9642, "step": 22830 }, { "epoch": 0.011097099621689786, "grad_norm": 0.0467740073800087, "learning_rate": 2.4963404823380575e-05, "loss": 6.9585, "step": 22840 }, { "epoch": 0.011349306431273645, "grad_norm": 0.06959223002195358, "learning_rate": 2.4938740409307836e-05, "loss": 6.9694, "step": 22850 }, { "epoch": 0.011601513240857503, "grad_norm": 0.07930698990821838, "learning_rate": 2.4914079515993866e-05, "loss": 6.9887, "step": 22860 }, { "epoch": 0.011853720050441363, "grad_norm": 0.08748893439769745, "learning_rate": 2.4889422160593485e-05, "loss": 6.9945, "step": 22870 }, { "epoch": 0.01210592686002522, "grad_norm": 0.07690118998289108, "learning_rate": 2.486476836025908e-05, "loss": 6.986, "step": 22880 }, { "epoch": 0.01235813366960908, "grad_norm": 0.07000916451215744, "learning_rate": 2.4840118132140527e-05, "loss": 6.9815, "step": 22890 }, { "epoch": 0.012610340479192938, "grad_norm": 0.07068539410829544, "learning_rate": 2.4815471493385254e-05, "loss": 6.9813, "step": 22900 }, { "epoch": 0.012862547288776798, "grad_norm": 0.07060578465461731, "learning_rate": 2.4790828461138167e-05, "loss": 6.9802, "step": 22910 }, { "epoch": 0.013114754098360656, "grad_norm": 0.07502042502164841, "learning_rate": 2.4766189052541684e-05, "loss": 6.9843, "step": 22920 }, { "epoch": 0.013366960907944515, "grad_norm": 0.07693812251091003, "learning_rate": 2.4741553284735673e-05, "loss": 6.9827, "step": 22930 }, { "epoch": 0.013619167717528373, "grad_norm": 0.07420184463262558, "learning_rate": 2.4716921174857515e-05, "loss": 6.9877, "step": 22940 }, { "epoch": 0.013871374527112233, "grad_norm": 0.07335975766181946, "learning_rate": 2.4692292740042e-05, "loss": 6.9843, "step": 22950 }, { "epoch": 0.01412358133669609, "grad_norm": 0.0680353119969368, "learning_rate": 2.466766799742137e-05, "loss": 6.9796, "step": 22960 }, { "epoch": 0.01437578814627995, "grad_norm": 0.07263767719268799, "learning_rate": 2.4643046964125346e-05, "loss": 6.9807, "step": 22970 }, { "epoch": 0.014627994955863808, "grad_norm": 0.06038213148713112, "learning_rate": 2.4618429657280986e-05, "loss": 6.9795, "step": 22980 }, { "epoch": 0.014880201765447668, "grad_norm": 0.07456285506486893, "learning_rate": 2.4593816094012836e-05, "loss": 6.9768, "step": 22990 }, { "epoch": 0.015132408575031526, "grad_norm": 0.07010521739721298, "learning_rate": 2.4569206291442795e-05, "loss": 6.9876, "step": 23000 }, { "epoch": 0.015384615384615385, "grad_norm": 0.055996689945459366, "learning_rate": 2.454460026669015e-05, "loss": 6.9723, "step": 23010 }, { "epoch": 0.015636822194199245, "grad_norm": 0.058094676584005356, "learning_rate": 2.451999803687156e-05, "loss": 6.9693, "step": 23020 }, { "epoch": 0.0158890290037831, "grad_norm": 0.08054610341787338, "learning_rate": 2.4495399619101063e-05, "loss": 6.9756, "step": 23030 }, { "epoch": 0.01614123581336696, "grad_norm": 0.08501625806093216, "learning_rate": 2.4470805030490018e-05, "loss": 6.9881, "step": 23040 }, { "epoch": 0.01639344262295082, "grad_norm": 0.07791858166456223, "learning_rate": 2.444621428814714e-05, "loss": 6.9872, "step": 23050 }, { "epoch": 0.01664564943253468, "grad_norm": 0.08236780762672424, "learning_rate": 2.442162740917845e-05, "loss": 6.9881, "step": 23060 }, { "epoch": 0.016897856242118536, "grad_norm": 0.0800807997584343, "learning_rate": 2.4397044410687324e-05, "loss": 6.988, "step": 23070 }, { "epoch": 0.017150063051702396, "grad_norm": 0.08249061554670334, "learning_rate": 2.4372465309774374e-05, "loss": 6.9882, "step": 23080 }, { "epoch": 0.017402269861286256, "grad_norm": 0.07825937867164612, "learning_rate": 2.434789012353754e-05, "loss": 6.9874, "step": 23090 }, { "epoch": 0.017654476670870115, "grad_norm": 0.0742950439453125, "learning_rate": 2.4323318869072054e-05, "loss": 6.9853, "step": 23100 }, { "epoch": 0.01790668348045397, "grad_norm": 0.07181450724601746, "learning_rate": 2.429875156347036e-05, "loss": 6.9838, "step": 23110 }, { "epoch": 0.01815889029003783, "grad_norm": 0.07871191948652267, "learning_rate": 2.427418822382222e-05, "loss": 6.9877, "step": 23120 }, { "epoch": 0.01841109709962169, "grad_norm": 0.08504471182823181, "learning_rate": 2.4249628867214587e-05, "loss": 6.9884, "step": 23130 }, { "epoch": 0.01866330390920555, "grad_norm": 0.08183567970991135, "learning_rate": 2.4225073510731667e-05, "loss": 6.9905, "step": 23140 }, { "epoch": 0.018915510718789406, "grad_norm": 0.0834234282374382, "learning_rate": 2.4200522171454868e-05, "loss": 6.9918, "step": 23150 }, { "epoch": 0.019167717528373266, "grad_norm": 0.08870960026979446, "learning_rate": 2.4175974866462825e-05, "loss": 6.9936, "step": 23160 }, { "epoch": 0.019419924337957126, "grad_norm": 0.08249490708112717, "learning_rate": 2.4151431612831345e-05, "loss": 6.9975, "step": 23170 }, { "epoch": 0.019672131147540985, "grad_norm": 0.07802927494049072, "learning_rate": 2.4126892427633432e-05, "loss": 6.9938, "step": 23180 }, { "epoch": 0.01992433795712484, "grad_norm": 0.06958996504545212, "learning_rate": 2.4102357327939255e-05, "loss": 6.9893, "step": 23190 }, { "epoch": 0.0201765447667087, "grad_norm": 0.06585121899843216, "learning_rate": 2.4077826330816127e-05, "loss": 6.9796, "step": 23200 }, { "epoch": 0.02042875157629256, "grad_norm": 0.044001877307891846, "learning_rate": 2.4053299453328536e-05, "loss": 6.9598, "step": 23210 }, { "epoch": 0.02068095838587642, "grad_norm": 0.028678391128778458, "learning_rate": 2.4028776712538068e-05, "loss": 6.9508, "step": 23220 }, { "epoch": 0.020933165195460277, "grad_norm": 0.03750862181186676, "learning_rate": 2.4004258125503488e-05, "loss": 6.9449, "step": 23230 }, { "epoch": 0.021185372005044136, "grad_norm": 0.040621478110551834, "learning_rate": 2.397974370928059e-05, "loss": 6.9523, "step": 23240 }, { "epoch": 0.021437578814627996, "grad_norm": 0.08189934492111206, "learning_rate": 2.395523348092235e-05, "loss": 6.9792, "step": 23250 }, { "epoch": 0.021689785624211855, "grad_norm": 0.080692820250988, "learning_rate": 2.3930727457478773e-05, "loss": 6.9861, "step": 23260 }, { "epoch": 0.02194199243379571, "grad_norm": 0.08128439635038376, "learning_rate": 2.3906225655996965e-05, "loss": 6.9861, "step": 23270 }, { "epoch": 0.02219419924337957, "grad_norm": 0.08126932382583618, "learning_rate": 2.3881728093521083e-05, "loss": 6.9867, "step": 23280 }, { "epoch": 0.02244640605296343, "grad_norm": 0.07697690278291702, "learning_rate": 2.3857234787092348e-05, "loss": 6.9869, "step": 23290 }, { "epoch": 0.02269861286254729, "grad_norm": 0.07158149033784866, "learning_rate": 2.383274575374901e-05, "loss": 6.9866, "step": 23300 }, { "epoch": 0.022950819672131147, "grad_norm": 0.07082059979438782, "learning_rate": 2.380826101052635e-05, "loss": 6.9872, "step": 23310 }, { "epoch": 0.023203026481715006, "grad_norm": 0.0662321150302887, "learning_rate": 2.3783780574456667e-05, "loss": 6.9832, "step": 23320 }, { "epoch": 0.023455233291298866, "grad_norm": 0.06983920931816101, "learning_rate": 2.3759304462569252e-05, "loss": 6.9814, "step": 23330 }, { "epoch": 0.023707440100882726, "grad_norm": 0.06879273056983948, "learning_rate": 2.3734832691890412e-05, "loss": 6.98, "step": 23340 }, { "epoch": 0.02395964691046658, "grad_norm": 0.06657678633928299, "learning_rate": 2.37103652794434e-05, "loss": 6.9778, "step": 23350 }, { "epoch": 0.02421185372005044, "grad_norm": 0.08029713481664658, "learning_rate": 2.3685902242248487e-05, "loss": 6.9814, "step": 23360 }, { "epoch": 0.0244640605296343, "grad_norm": 0.06303472071886063, "learning_rate": 2.3661443597322835e-05, "loss": 6.9807, "step": 23370 }, { "epoch": 0.02471626733921816, "grad_norm": 0.07386327534914017, "learning_rate": 2.363698936168061e-05, "loss": 6.9844, "step": 23380 }, { "epoch": 0.024968474148802017, "grad_norm": 0.0660238265991211, "learning_rate": 2.3612539552332876e-05, "loss": 6.9841, "step": 23390 }, { "epoch": 0.025220680958385876, "grad_norm": 0.06190294772386551, "learning_rate": 2.3588094186287637e-05, "loss": 6.9744, "step": 23400 }, { "epoch": 0.025472887767969736, "grad_norm": 0.06773626804351807, "learning_rate": 2.3563653280549785e-05, "loss": 6.9744, "step": 23410 }, { "epoch": 0.025725094577553596, "grad_norm": 0.06910261511802673, "learning_rate": 2.353921685212114e-05, "loss": 6.9789, "step": 23420 }, { "epoch": 0.025977301387137452, "grad_norm": 0.06589729338884354, "learning_rate": 2.3514784918000373e-05, "loss": 6.9754, "step": 23430 }, { "epoch": 0.02622950819672131, "grad_norm": 0.07712016254663467, "learning_rate": 2.349035749518305e-05, "loss": 6.9859, "step": 23440 }, { "epoch": 0.02648171500630517, "grad_norm": 0.07989435642957687, "learning_rate": 2.3465934600661598e-05, "loss": 6.9888, "step": 23450 }, { "epoch": 0.02673392181588903, "grad_norm": 0.07752510160207748, "learning_rate": 2.3441516251425284e-05, "loss": 6.9895, "step": 23460 }, { "epoch": 0.026986128625472887, "grad_norm": 0.0733417198061943, "learning_rate": 2.3417102464460222e-05, "loss": 6.9857, "step": 23470 }, { "epoch": 0.027238335435056747, "grad_norm": 0.05999869480729103, "learning_rate": 2.339269325674934e-05, "loss": 6.9734, "step": 23480 }, { "epoch": 0.027490542244640606, "grad_norm": 0.061577048152685165, "learning_rate": 2.3368288645272415e-05, "loss": 6.9741, "step": 23490 }, { "epoch": 0.027742749054224466, "grad_norm": 0.059564318507909775, "learning_rate": 2.3343888647005967e-05, "loss": 6.9722, "step": 23500 }, { "epoch": 0.027994955863808322, "grad_norm": 0.08104056119918823, "learning_rate": 2.3319493278923372e-05, "loss": 6.9782, "step": 23510 }, { "epoch": 0.02824716267339218, "grad_norm": 0.06525153666734695, "learning_rate": 2.3295102557994733e-05, "loss": 6.9805, "step": 23520 }, { "epoch": 0.02849936948297604, "grad_norm": 0.06942378729581833, "learning_rate": 2.327071650118696e-05, "loss": 6.9789, "step": 23530 }, { "epoch": 0.0287515762925599, "grad_norm": 0.07159280776977539, "learning_rate": 2.3246335125463686e-05, "loss": 6.9853, "step": 23540 }, { "epoch": 0.029003783102143757, "grad_norm": 0.07989176362752914, "learning_rate": 2.3221958447785313e-05, "loss": 6.9824, "step": 23550 }, { "epoch": 0.029255989911727617, "grad_norm": 0.08408384770154953, "learning_rate": 2.3197586485108957e-05, "loss": 6.9877, "step": 23560 }, { "epoch": 0.029508196721311476, "grad_norm": 0.07714583724737167, "learning_rate": 2.317321925438846e-05, "loss": 6.9894, "step": 23570 }, { "epoch": 0.029760403530895336, "grad_norm": 0.0808466374874115, "learning_rate": 2.3148856772574378e-05, "loss": 6.9905, "step": 23580 }, { "epoch": 0.030012610340479192, "grad_norm": 0.08379368484020233, "learning_rate": 2.3124499056613953e-05, "loss": 6.9969, "step": 23590 }, { "epoch": 0.03026481715006305, "grad_norm": 0.08464210480451584, "learning_rate": 2.310014612345113e-05, "loss": 6.9977, "step": 23600 }, { "epoch": 0.00025220680958385876, "grad_norm": 0.07315165549516678, "learning_rate": 2.307579799002649e-05, "loss": 6.9854, "step": 23610 }, { "epoch": 0.0005044136191677175, "grad_norm": 0.07485118508338928, "learning_rate": 2.305145467327734e-05, "loss": 6.9833, "step": 23620 }, { "epoch": 0.0007566204287515763, "grad_norm": 0.06995508819818497, "learning_rate": 2.302711619013755e-05, "loss": 6.9857, "step": 23630 }, { "epoch": 0.001008827238335435, "grad_norm": 0.07604967802762985, "learning_rate": 2.3002782557537713e-05, "loss": 6.9866, "step": 23640 }, { "epoch": 0.0012610340479192938, "grad_norm": 0.06960346549749374, "learning_rate": 2.2978453792404982e-05, "loss": 6.987, "step": 23650 }, { "epoch": 0.0015132408575031526, "grad_norm": 0.06684451550245285, "learning_rate": 2.2954129911663167e-05, "loss": 6.9795, "step": 23660 }, { "epoch": 0.0017654476670870113, "grad_norm": 0.06092686206102371, "learning_rate": 2.292981093223265e-05, "loss": 6.9777, "step": 23670 }, { "epoch": 0.00201765447667087, "grad_norm": 0.06247374042868614, "learning_rate": 2.2905496871030436e-05, "loss": 6.9745, "step": 23680 }, { "epoch": 0.002269861286254729, "grad_norm": 0.0634429082274437, "learning_rate": 2.2881187744970076e-05, "loss": 6.979, "step": 23690 }, { "epoch": 0.0025220680958385876, "grad_norm": 0.07756639271974564, "learning_rate": 2.28568835709617e-05, "loss": 6.9776, "step": 23700 }, { "epoch": 0.0027742749054224464, "grad_norm": 0.07502856850624084, "learning_rate": 2.2832584365912008e-05, "loss": 6.9849, "step": 23710 }, { "epoch": 0.003026481715006305, "grad_norm": 0.05630791187286377, "learning_rate": 2.2808290146724212e-05, "loss": 6.9806, "step": 23720 }, { "epoch": 0.003278688524590164, "grad_norm": 0.07453564554452896, "learning_rate": 2.27840009302981e-05, "loss": 6.981, "step": 23730 }, { "epoch": 0.0035308953341740227, "grad_norm": 0.08062916249036789, "learning_rate": 2.275971673352992e-05, "loss": 6.9886, "step": 23740 }, { "epoch": 0.0037831021437578815, "grad_norm": 0.08117837458848953, "learning_rate": 2.273543757331251e-05, "loss": 6.9905, "step": 23750 }, { "epoch": 0.00403530895334174, "grad_norm": 0.07628142833709717, "learning_rate": 2.2711163466535104e-05, "loss": 6.9882, "step": 23760 }, { "epoch": 0.004287515762925599, "grad_norm": 0.08234953880310059, "learning_rate": 2.2686894430083518e-05, "loss": 6.9909, "step": 23770 }, { "epoch": 0.004539722572509458, "grad_norm": 0.06468982249498367, "learning_rate": 2.266263048083997e-05, "loss": 6.9817, "step": 23780 }, { "epoch": 0.0047919293820933165, "grad_norm": 0.05753365531563759, "learning_rate": 2.2638371635683184e-05, "loss": 6.9742, "step": 23790 }, { "epoch": 0.005044136191677175, "grad_norm": 0.06579476594924927, "learning_rate": 2.26141179114883e-05, "loss": 6.9771, "step": 23800 }, { "epoch": 0.005296343001261034, "grad_norm": 0.06465059518814087, "learning_rate": 2.2589869325126916e-05, "loss": 6.9777, "step": 23810 }, { "epoch": 0.005548549810844893, "grad_norm": 0.05708220973610878, "learning_rate": 2.2565625893467054e-05, "loss": 6.9719, "step": 23820 }, { "epoch": 0.005800756620428752, "grad_norm": 0.059325072914361954, "learning_rate": 2.254138763337314e-05, "loss": 6.9703, "step": 23830 }, { "epoch": 0.00605296343001261, "grad_norm": 0.04346957430243492, "learning_rate": 2.251715456170602e-05, "loss": 6.9707, "step": 23840 }, { "epoch": 0.006305170239596469, "grad_norm": 0.06862923502922058, "learning_rate": 2.249292669532291e-05, "loss": 6.977, "step": 23850 }, { "epoch": 0.006557377049180328, "grad_norm": 0.06626904755830765, "learning_rate": 2.246870405107743e-05, "loss": 6.9803, "step": 23860 }, { "epoch": 0.006809583858764187, "grad_norm": 0.02972029149532318, "learning_rate": 2.2444486645819525e-05, "loss": 6.9621, "step": 23870 }, { "epoch": 0.007061790668348045, "grad_norm": 0.05259213224053383, "learning_rate": 2.2420274496395563e-05, "loss": 6.9571, "step": 23880 }, { "epoch": 0.007313997477931904, "grad_norm": 0.07120011001825333, "learning_rate": 2.239606761964818e-05, "loss": 6.9788, "step": 23890 }, { "epoch": 0.007566204287515763, "grad_norm": 0.07727798819541931, "learning_rate": 2.2371866032416407e-05, "loss": 6.9885, "step": 23900 }, { "epoch": 0.007818411097099623, "grad_norm": 0.0775180384516716, "learning_rate": 2.234766975153555e-05, "loss": 6.9853, "step": 23910 }, { "epoch": 0.00807061790668348, "grad_norm": 0.0884319618344307, "learning_rate": 2.2323478793837263e-05, "loss": 6.9855, "step": 23920 }, { "epoch": 0.00832282471626734, "grad_norm": 0.06589823216199875, "learning_rate": 2.229929317614946e-05, "loss": 6.9801, "step": 23930 }, { "epoch": 0.008575031525851198, "grad_norm": 0.07254309207201004, "learning_rate": 2.2275112915296358e-05, "loss": 6.9797, "step": 23940 }, { "epoch": 0.008827238335435058, "grad_norm": 0.06493329256772995, "learning_rate": 2.2250938028098456e-05, "loss": 6.9826, "step": 23950 }, { "epoch": 0.009079445145018916, "grad_norm": 0.06739312410354614, "learning_rate": 2.2226768531372497e-05, "loss": 6.9845, "step": 23960 }, { "epoch": 0.009331651954602775, "grad_norm": 0.06596577912569046, "learning_rate": 2.2202604441931486e-05, "loss": 6.9816, "step": 23970 }, { "epoch": 0.009583858764186633, "grad_norm": 0.07551447302103043, "learning_rate": 2.2178445776584665e-05, "loss": 6.9817, "step": 23980 }, { "epoch": 0.009836065573770493, "grad_norm": 0.07892466336488724, "learning_rate": 2.2154292552137506e-05, "loss": 6.9851, "step": 23990 }, { "epoch": 0.01008827238335435, "grad_norm": 0.07690194249153137, "learning_rate": 2.2130144785391667e-05, "loss": 6.9854, "step": 24000 }, { "epoch": 0.01034047919293821, "grad_norm": 0.07524290680885315, "learning_rate": 2.2106002493145076e-05, "loss": 6.985, "step": 24010 }, { "epoch": 0.010592686002522068, "grad_norm": 0.06765297055244446, "learning_rate": 2.2081865692191767e-05, "loss": 6.9843, "step": 24020 }, { "epoch": 0.010844892812105928, "grad_norm": 0.052009522914886475, "learning_rate": 2.2057734399322035e-05, "loss": 6.9599, "step": 24030 }, { "epoch": 0.011097099621689786, "grad_norm": 0.06842959672212601, "learning_rate": 2.203360863132228e-05, "loss": 6.973, "step": 24040 }, { "epoch": 0.011349306431273645, "grad_norm": 0.07335340231657028, "learning_rate": 2.2009488404975102e-05, "loss": 6.9796, "step": 24050 }, { "epoch": 0.011601513240857503, "grad_norm": 0.06770545989274979, "learning_rate": 2.1985373737059218e-05, "loss": 6.9801, "step": 24060 }, { "epoch": 0.011853720050441363, "grad_norm": 0.07448172569274902, "learning_rate": 2.196126464434949e-05, "loss": 6.9822, "step": 24070 }, { "epoch": 0.01210592686002522, "grad_norm": 0.07353568077087402, "learning_rate": 2.1937161143616914e-05, "loss": 6.9864, "step": 24080 }, { "epoch": 0.01235813366960908, "grad_norm": 0.06601658463478088, "learning_rate": 2.191306325162856e-05, "loss": 6.9856, "step": 24090 }, { "epoch": 0.012610340479192938, "grad_norm": 0.06698822975158691, "learning_rate": 2.1888970985147644e-05, "loss": 6.9835, "step": 24100 }, { "epoch": 0.012862547288776798, "grad_norm": 0.06541674584150314, "learning_rate": 2.1864884360933428e-05, "loss": 6.9791, "step": 24110 }, { "epoch": 0.013114754098360656, "grad_norm": 0.07407241314649582, "learning_rate": 2.1840803395741275e-05, "loss": 6.9828, "step": 24120 }, { "epoch": 0.013366960907944515, "grad_norm": 0.0679241418838501, "learning_rate": 2.1816728106322585e-05, "loss": 6.9794, "step": 24130 }, { "epoch": 0.013619167717528373, "grad_norm": 0.06130746379494667, "learning_rate": 2.1792658509424856e-05, "loss": 6.9774, "step": 24140 }, { "epoch": 0.013871374527112233, "grad_norm": 0.07252240926027298, "learning_rate": 2.1768594621791566e-05, "loss": 6.978, "step": 24150 }, { "epoch": 0.01412358133669609, "grad_norm": 0.07183673232793808, "learning_rate": 2.1744536460162277e-05, "loss": 6.9836, "step": 24160 }, { "epoch": 0.01437578814627995, "grad_norm": 0.07030614465475082, "learning_rate": 2.1720484041272537e-05, "loss": 6.9843, "step": 24170 }, { "epoch": 0.014627994955863808, "grad_norm": 0.07638239115476608, "learning_rate": 2.1696437381853883e-05, "loss": 6.9875, "step": 24180 }, { "epoch": 0.014880201765447668, "grad_norm": 0.06827562302350998, "learning_rate": 2.1672396498633898e-05, "loss": 6.9819, "step": 24190 }, { "epoch": 0.015132408575031526, "grad_norm": 0.07848426699638367, "learning_rate": 2.1648361408336096e-05, "loss": 6.9838, "step": 24200 }, { "epoch": 0.015384615384615385, "grad_norm": 0.07337295264005661, "learning_rate": 2.1624332127679995e-05, "loss": 6.9861, "step": 24210 }, { "epoch": 0.015636822194199245, "grad_norm": 0.05103433504700661, "learning_rate": 2.160030867338105e-05, "loss": 6.9697, "step": 24220 }, { "epoch": 0.0158890290037831, "grad_norm": 0.06744540482759476, "learning_rate": 2.1576291062150684e-05, "loss": 6.9739, "step": 24230 }, { "epoch": 0.01614123581336696, "grad_norm": 0.06490875035524368, "learning_rate": 2.1552279310696225e-05, "loss": 6.9768, "step": 24240 }, { "epoch": 0.01639344262295082, "grad_norm": 0.06405938416719437, "learning_rate": 2.152827343572096e-05, "loss": 6.9745, "step": 24250 }, { "epoch": 0.01664564943253468, "grad_norm": 0.0732630118727684, "learning_rate": 2.1504273453924048e-05, "loss": 6.9831, "step": 24260 }, { "epoch": 0.016897856242118536, "grad_norm": 0.07431436330080032, "learning_rate": 2.1480279382000605e-05, "loss": 6.983, "step": 24270 }, { "epoch": 0.017150063051702396, "grad_norm": 0.0684107169508934, "learning_rate": 2.1456291236641566e-05, "loss": 6.9788, "step": 24280 }, { "epoch": 0.017402269861286256, "grad_norm": 0.061650846153497696, "learning_rate": 2.1432309034533807e-05, "loss": 6.9761, "step": 24290 }, { "epoch": 0.017654476670870115, "grad_norm": 0.06949859112501144, "learning_rate": 2.1408332792360044e-05, "loss": 6.977, "step": 24300 }, { "epoch": 0.01790668348045397, "grad_norm": 0.05585968494415283, "learning_rate": 2.138436252679881e-05, "loss": 6.9769, "step": 24310 }, { "epoch": 0.01815889029003783, "grad_norm": 0.060192253440618515, "learning_rate": 2.136039825452456e-05, "loss": 6.9719, "step": 24320 }, { "epoch": 0.01841109709962169, "grad_norm": 0.05424096807837486, "learning_rate": 2.1336439992207504e-05, "loss": 6.9688, "step": 24330 }, { "epoch": 0.01866330390920555, "grad_norm": 0.05857578292489052, "learning_rate": 2.1312487756513722e-05, "loss": 6.9691, "step": 24340 }, { "epoch": 0.018915510718789406, "grad_norm": 0.06844812631607056, "learning_rate": 2.1288541564105068e-05, "loss": 6.9762, "step": 24350 }, { "epoch": 0.019167717528373266, "grad_norm": 0.07820528745651245, "learning_rate": 2.1264601431639227e-05, "loss": 6.9786, "step": 24360 }, { "epoch": 0.019419924337957126, "grad_norm": 0.07472148537635803, "learning_rate": 2.1240667375769627e-05, "loss": 6.9831, "step": 24370 }, { "epoch": 0.019672131147540985, "grad_norm": 0.07436320185661316, "learning_rate": 2.1216739413145507e-05, "loss": 6.9879, "step": 24380 }, { "epoch": 0.01992433795712484, "grad_norm": 0.081243135035038, "learning_rate": 2.1192817560411832e-05, "loss": 6.9878, "step": 24390 }, { "epoch": 0.0201765447667087, "grad_norm": 0.07967602461576462, "learning_rate": 2.1168901834209366e-05, "loss": 6.9875, "step": 24400 }, { "epoch": 0.02042875157629256, "grad_norm": 0.07883425801992416, "learning_rate": 2.1144992251174542e-05, "loss": 6.989, "step": 24410 }, { "epoch": 0.02068095838587642, "grad_norm": 0.07577212899923325, "learning_rate": 2.1121088827939595e-05, "loss": 6.9882, "step": 24420 }, { "epoch": 0.020933165195460277, "grad_norm": 0.07569920271635056, "learning_rate": 2.1097191581132426e-05, "loss": 6.9858, "step": 24430 }, { "epoch": 0.021185372005044136, "grad_norm": 0.0709572359919548, "learning_rate": 2.107330052737663e-05, "loss": 6.9853, "step": 24440 }, { "epoch": 0.021437578814627996, "grad_norm": 0.0638098195195198, "learning_rate": 2.1049415683291548e-05, "loss": 6.9823, "step": 24450 }, { "epoch": 0.021689785624211855, "grad_norm": 0.07253843545913696, "learning_rate": 2.102553706549215e-05, "loss": 6.9848, "step": 24460 }, { "epoch": 0.02194199243379571, "grad_norm": 0.06196874752640724, "learning_rate": 2.1001664690589107e-05, "loss": 6.9773, "step": 24470 }, { "epoch": 0.02219419924337957, "grad_norm": 0.07167524844408035, "learning_rate": 2.097779857518872e-05, "loss": 6.9793, "step": 24480 }, { "epoch": 0.02244640605296343, "grad_norm": 0.05791318789124489, "learning_rate": 2.095393873589296e-05, "loss": 6.9766, "step": 24490 }, { "epoch": 0.02269861286254729, "grad_norm": 0.06368162482976913, "learning_rate": 2.0930085189299416e-05, "loss": 6.9744, "step": 24500 }, { "epoch": 0.022950819672131147, "grad_norm": 0.06275410205125809, "learning_rate": 2.0906237952001317e-05, "loss": 6.9738, "step": 24510 }, { "epoch": 0.023203026481715006, "grad_norm": 0.05429702624678612, "learning_rate": 2.088239704058747e-05, "loss": 6.9721, "step": 24520 }, { "epoch": 0.023455233291298866, "grad_norm": 0.05957522988319397, "learning_rate": 2.0858562471642338e-05, "loss": 6.9706, "step": 24530 }, { "epoch": 0.023707440100882726, "grad_norm": 0.05424430966377258, "learning_rate": 2.0834734261745912e-05, "loss": 6.974, "step": 24540 }, { "epoch": 0.02395964691046658, "grad_norm": 0.06851207464933395, "learning_rate": 2.0810912427473775e-05, "loss": 6.9782, "step": 24550 }, { "epoch": 0.02421185372005044, "grad_norm": 0.078624427318573, "learning_rate": 2.0787096985397123e-05, "loss": 6.9853, "step": 24560 }, { "epoch": 0.0244640605296343, "grad_norm": 0.074544757604599, "learning_rate": 2.0763287952082623e-05, "loss": 6.9852, "step": 24570 }, { "epoch": 0.02471626733921816, "grad_norm": 0.0786362886428833, "learning_rate": 2.0739485344092565e-05, "loss": 6.9881, "step": 24580 }, { "epoch": 0.024968474148802017, "grad_norm": 0.0778612494468689, "learning_rate": 2.071568917798471e-05, "loss": 6.9885, "step": 24590 }, { "epoch": 0.025220680958385876, "grad_norm": 0.06664574891328812, "learning_rate": 2.0691899470312372e-05, "loss": 6.9839, "step": 24600 }, { "epoch": 0.025472887767969736, "grad_norm": 0.06601030379533768, "learning_rate": 2.0668116237624356e-05, "loss": 6.9797, "step": 24610 }, { "epoch": 0.025725094577553596, "grad_norm": 0.07540690153837204, "learning_rate": 2.0644339496464965e-05, "loss": 6.9802, "step": 24620 }, { "epoch": 0.025977301387137452, "grad_norm": 0.04516175761818886, "learning_rate": 2.0620569263373994e-05, "loss": 6.9644, "step": 24630 }, { "epoch": 0.02622950819672131, "grad_norm": 0.05562988296151161, "learning_rate": 2.0596805554886702e-05, "loss": 6.9634, "step": 24640 }, { "epoch": 0.02648171500630517, "grad_norm": 0.06311940401792526, "learning_rate": 2.0573048387533805e-05, "loss": 6.971, "step": 24650 }, { "epoch": 0.02673392181588903, "grad_norm": 0.06637302786111832, "learning_rate": 2.0549297777841508e-05, "loss": 6.9788, "step": 24660 }, { "epoch": 0.026986128625472887, "grad_norm": 0.06799791008234024, "learning_rate": 2.052555374233139e-05, "loss": 6.9796, "step": 24670 }, { "epoch": 0.027238335435056747, "grad_norm": 0.06349587440490723, "learning_rate": 2.0501816297520493e-05, "loss": 6.9761, "step": 24680 }, { "epoch": 0.027490542244640606, "grad_norm": 0.06618224829435349, "learning_rate": 2.047808545992129e-05, "loss": 6.9755, "step": 24690 }, { "epoch": 0.027742749054224466, "grad_norm": 0.07013630867004395, "learning_rate": 2.0454361246041613e-05, "loss": 6.9802, "step": 24700 }, { "epoch": 0.027994955863808322, "grad_norm": 0.07548052072525024, "learning_rate": 2.043064367238474e-05, "loss": 6.9816, "step": 24710 }, { "epoch": 0.02824716267339218, "grad_norm": 0.07275006920099258, "learning_rate": 2.0406932755449282e-05, "loss": 6.9826, "step": 24720 }, { "epoch": 0.02849936948297604, "grad_norm": 0.066709965467453, "learning_rate": 2.0383228511729258e-05, "loss": 6.9777, "step": 24730 }, { "epoch": 0.0287515762925599, "grad_norm": 0.07263965159654617, "learning_rate": 2.0359530957714006e-05, "loss": 6.9799, "step": 24740 }, { "epoch": 0.029003783102143757, "grad_norm": 0.06758581846952438, "learning_rate": 2.0335840109888246e-05, "loss": 6.9836, "step": 24750 }, { "epoch": 0.029255989911727617, "grad_norm": 0.0805225670337677, "learning_rate": 2.0312155984732006e-05, "loss": 6.9865, "step": 24760 }, { "epoch": 0.029508196721311476, "grad_norm": 0.06532759219408035, "learning_rate": 2.028847859872066e-05, "loss": 6.9839, "step": 24770 }, { "epoch": 0.029760403530895336, "grad_norm": 0.05922757834196091, "learning_rate": 2.0264807968324866e-05, "loss": 6.9761, "step": 24780 }, { "epoch": 0.030012610340479192, "grad_norm": 0.06934261322021484, "learning_rate": 2.0241144110010636e-05, "loss": 6.9766, "step": 24790 }, { "epoch": 0.03026481715006305, "grad_norm": 0.06737423688173294, "learning_rate": 2.0217487040239203e-05, "loss": 6.9773, "step": 24800 }, { "epoch": 0.03051702395964691, "grad_norm": 0.07552351802587509, "learning_rate": 2.0193836775467108e-05, "loss": 6.9835, "step": 24810 }, { "epoch": 0.03076923076923077, "grad_norm": 0.07695738226175308, "learning_rate": 2.017019333214619e-05, "loss": 6.9873, "step": 24820 }, { "epoch": 0.031021437578814627, "grad_norm": 0.07906481623649597, "learning_rate": 2.0146556726723475e-05, "loss": 6.9882, "step": 24830 }, { "epoch": 0.03127364438839849, "grad_norm": 0.0792229175567627, "learning_rate": 2.01229269756413e-05, "loss": 6.9878, "step": 24840 }, { "epoch": 0.031525851197982346, "grad_norm": 0.060449011623859406, "learning_rate": 2.0099304095337197e-05, "loss": 6.9806, "step": 24850 }, { "epoch": 0.0317780580075662, "grad_norm": 0.06484956294298172, "learning_rate": 2.0075688102243923e-05, "loss": 6.974, "step": 24860 }, { "epoch": 0.032030264817150066, "grad_norm": 0.06320662051439285, "learning_rate": 2.005207901278944e-05, "loss": 6.9751, "step": 24870 }, { "epoch": 0.03228247162673392, "grad_norm": 0.0659361407160759, "learning_rate": 2.0028476843396927e-05, "loss": 6.9757, "step": 24880 }, { "epoch": 0.03253467843631778, "grad_norm": 0.07731663435697556, "learning_rate": 2.0004881610484723e-05, "loss": 6.9826, "step": 24890 }, { "epoch": 0.03278688524590164, "grad_norm": 0.07246169447898865, "learning_rate": 1.9981293330466373e-05, "loss": 6.9852, "step": 24900 }, { "epoch": 0.0330390920554855, "grad_norm": 0.0701514258980751, "learning_rate": 1.9957712019750555e-05, "loss": 6.984, "step": 24910 }, { "epoch": 0.03329129886506936, "grad_norm": 0.07286476343870163, "learning_rate": 1.993413769474111e-05, "loss": 6.984, "step": 24920 }, { "epoch": 0.033543505674653216, "grad_norm": 0.06203853338956833, "learning_rate": 1.9910570371837032e-05, "loss": 6.9768, "step": 24930 }, { "epoch": 0.03379571248423707, "grad_norm": 0.054161012172698975, "learning_rate": 1.988701006743241e-05, "loss": 6.9724, "step": 24940 }, { "epoch": 0.034047919293820936, "grad_norm": 0.06525396555662155, "learning_rate": 1.986345679791652e-05, "loss": 6.9727, "step": 24950 }, { "epoch": 0.03430012610340479, "grad_norm": 0.06806611269712448, "learning_rate": 1.9839910579673642e-05, "loss": 6.981, "step": 24960 }, { "epoch": 0.03455233291298865, "grad_norm": 0.06163162738084793, "learning_rate": 1.9816371429083248e-05, "loss": 6.9801, "step": 24970 }, { "epoch": 0.03480453972257251, "grad_norm": 0.06143777817487717, "learning_rate": 1.9792839362519837e-05, "loss": 6.9781, "step": 24980 }, { "epoch": 0.03505674653215637, "grad_norm": 0.06516889482736588, "learning_rate": 1.9769314396353007e-05, "loss": 6.9793, "step": 24990 }, { "epoch": 0.03530895334174023, "grad_norm": 0.06896864622831345, "learning_rate": 1.9745796546947388e-05, "loss": 6.9808, "step": 25000 }, { "epoch": 0.03556116015132409, "grad_norm": 0.06831832975149155, "learning_rate": 1.9722285830662695e-05, "loss": 6.9794, "step": 25010 }, { "epoch": 0.03581336696090794, "grad_norm": 0.06576088070869446, "learning_rate": 1.969878226385365e-05, "loss": 6.9794, "step": 25020 }, { "epoch": 0.036065573770491806, "grad_norm": 0.06546693295240402, "learning_rate": 1.9675285862870027e-05, "loss": 6.9774, "step": 25030 }, { "epoch": 0.03631778058007566, "grad_norm": 0.05822691321372986, "learning_rate": 1.965179664405659e-05, "loss": 6.9808, "step": 25040 }, { "epoch": 0.03656998738965952, "grad_norm": 0.05654827132821083, "learning_rate": 1.962831462375313e-05, "loss": 6.9606, "step": 25050 }, { "epoch": 0.03682219419924338, "grad_norm": 0.06039641052484512, "learning_rate": 1.9604839818294413e-05, "loss": 6.9721, "step": 25060 }, { "epoch": 0.03707440100882724, "grad_norm": 0.058862946927547455, "learning_rate": 1.958137224401019e-05, "loss": 6.9713, "step": 25070 }, { "epoch": 0.0373266078184111, "grad_norm": 0.061000559478998184, "learning_rate": 1.955791191722521e-05, "loss": 6.9704, "step": 25080 }, { "epoch": 0.03757881462799496, "grad_norm": 0.059897247701883316, "learning_rate": 1.9534458854259123e-05, "loss": 6.9736, "step": 25090 }, { "epoch": 0.03783102143757881, "grad_norm": 0.05319308489561081, "learning_rate": 1.9511013071426584e-05, "loss": 6.9689, "step": 25100 }, { "epoch": 0.038083228247162676, "grad_norm": 0.07312078028917313, "learning_rate": 1.948757458503714e-05, "loss": 6.9776, "step": 25110 }, { "epoch": 0.03833543505674653, "grad_norm": 0.06450211256742477, "learning_rate": 1.94641434113953e-05, "loss": 6.9777, "step": 25120 }, { "epoch": 0.03858764186633039, "grad_norm": 0.06483352929353714, "learning_rate": 1.9440719566800452e-05, "loss": 6.9792, "step": 25130 }, { "epoch": 0.03883984867591425, "grad_norm": 0.07303968816995621, "learning_rate": 1.9417303067546913e-05, "loss": 6.984, "step": 25140 }, { "epoch": 0.03909205548549811, "grad_norm": 0.050947126001119614, "learning_rate": 1.9393893929923857e-05, "loss": 6.9749, "step": 25150 }, { "epoch": 0.03934426229508197, "grad_norm": 0.07237696647644043, "learning_rate": 1.9370492170215382e-05, "loss": 6.9778, "step": 25160 }, { "epoch": 0.03959646910466583, "grad_norm": 0.07180490344762802, "learning_rate": 1.934709780470042e-05, "loss": 6.9823, "step": 25170 }, { "epoch": 0.03984867591424968, "grad_norm": 0.07105462998151779, "learning_rate": 1.9323710849652766e-05, "loss": 6.9864, "step": 25180 }, { "epoch": 0.040100882723833546, "grad_norm": 0.06334982067346573, "learning_rate": 1.9300331321341065e-05, "loss": 6.9827, "step": 25190 }, { "epoch": 0.0403530895334174, "grad_norm": 0.06732272356748581, "learning_rate": 1.927695923602879e-05, "loss": 6.9819, "step": 25200 }, { "epoch": 0.04060529634300126, "grad_norm": 0.07368966937065125, "learning_rate": 1.925359460997426e-05, "loss": 6.9824, "step": 25210 }, { "epoch": 0.04085750315258512, "grad_norm": 0.071007639169693, "learning_rate": 1.9230237459430554e-05, "loss": 6.986, "step": 25220 }, { "epoch": 0.04110970996216898, "grad_norm": 0.07539627701044083, "learning_rate": 1.9206887800645613e-05, "loss": 6.9808, "step": 25230 }, { "epoch": 0.04136191677175284, "grad_norm": 0.06390125304460526, "learning_rate": 1.9183545649862112e-05, "loss": 6.9789, "step": 25240 }, { "epoch": 0.0416141235813367, "grad_norm": 0.07195311039686203, "learning_rate": 1.916021102331755e-05, "loss": 6.9818, "step": 25250 }, { "epoch": 0.04186633039092055, "grad_norm": 0.07609806209802628, "learning_rate": 1.913688393724415e-05, "loss": 6.9879, "step": 25260 }, { "epoch": 0.042118537200504416, "grad_norm": 0.07127057760953903, "learning_rate": 1.9113564407868923e-05, "loss": 6.9845, "step": 25270 }, { "epoch": 0.04237074401008827, "grad_norm": 0.07327739149332047, "learning_rate": 1.9090252451413596e-05, "loss": 6.984, "step": 25280 }, { "epoch": 0.04262295081967213, "grad_norm": 0.07094857096672058, "learning_rate": 1.9066948084094644e-05, "loss": 6.9829, "step": 25290 }, { "epoch": 0.04287515762925599, "grad_norm": 0.07330840080976486, "learning_rate": 1.904365132212327e-05, "loss": 6.985, "step": 25300 }, { "epoch": 0.04312736443883985, "grad_norm": 0.0391412228345871, "learning_rate": 1.902036218170536e-05, "loss": 6.9614, "step": 25310 }, { "epoch": 0.04337957124842371, "grad_norm": 0.060993071645498276, "learning_rate": 1.8997080679041533e-05, "loss": 6.9653, "step": 25320 }, { "epoch": 0.04363177805800757, "grad_norm": 0.07027994841337204, "learning_rate": 1.8973806830327053e-05, "loss": 6.9754, "step": 25330 }, { "epoch": 0.04388398486759142, "grad_norm": 0.07121443003416061, "learning_rate": 1.8950540651751914e-05, "loss": 6.9817, "step": 25340 }, { "epoch": 0.044136191677175286, "grad_norm": 0.0706179216504097, "learning_rate": 1.892728215950071e-05, "loss": 6.9846, "step": 25350 }, { "epoch": 0.04438839848675914, "grad_norm": 0.06499464064836502, "learning_rate": 1.8904031369752753e-05, "loss": 6.9753, "step": 25360 }, { "epoch": 0.044640605296343, "grad_norm": 0.05636989325284958, "learning_rate": 1.8880788298681948e-05, "loss": 6.9711, "step": 25370 }, { "epoch": 0.04489281210592686, "grad_norm": 0.06047135218977928, "learning_rate": 1.8857552962456852e-05, "loss": 6.9734, "step": 25380 }, { "epoch": 0.04514501891551072, "grad_norm": 0.07309477776288986, "learning_rate": 1.8834325377240636e-05, "loss": 6.9814, "step": 25390 }, { "epoch": 0.04539722572509458, "grad_norm": 0.06777913123369217, "learning_rate": 1.8811105559191088e-05, "loss": 6.9814, "step": 25400 }, { "epoch": 0.04564943253467844, "grad_norm": 0.07253231853246689, "learning_rate": 1.8787893524460586e-05, "loss": 6.9818, "step": 25410 }, { "epoch": 0.04590163934426229, "grad_norm": 0.056680794805288315, "learning_rate": 1.8764689289196076e-05, "loss": 6.9737, "step": 25420 }, { "epoch": 0.046153846153846156, "grad_norm": 0.06195201724767685, "learning_rate": 1.8741492869539116e-05, "loss": 6.9725, "step": 25430 }, { "epoch": 0.04640605296343001, "grad_norm": 0.059349123388528824, "learning_rate": 1.8718304281625798e-05, "loss": 6.9735, "step": 25440 }, { "epoch": 0.04665825977301387, "grad_norm": 0.07708489149808884, "learning_rate": 1.869512354158678e-05, "loss": 6.9838, "step": 25450 }, { "epoch": 0.04691046658259773, "grad_norm": 0.0727149248123169, "learning_rate": 1.867195066554724e-05, "loss": 6.984, "step": 25460 }, { "epoch": 0.04716267339218159, "grad_norm": 0.07370393723249435, "learning_rate": 1.864878566962694e-05, "loss": 6.9831, "step": 25470 }, { "epoch": 0.04741488020176545, "grad_norm": 0.075920470058918, "learning_rate": 1.862562856994007e-05, "loss": 6.9849, "step": 25480 }, { "epoch": 0.04766708701134931, "grad_norm": 0.07265771180391312, "learning_rate": 1.860247938259543e-05, "loss": 6.9845, "step": 25490 }, { "epoch": 0.04791929382093316, "grad_norm": 0.07174354046583176, "learning_rate": 1.8579338123696227e-05, "loss": 6.9845, "step": 25500 }, { "epoch": 0.048171500630517027, "grad_norm": 0.07087887078523636, "learning_rate": 1.8556204809340216e-05, "loss": 6.9829, "step": 25510 }, { "epoch": 0.04842370744010088, "grad_norm": 0.0735405683517456, "learning_rate": 1.8533079455619588e-05, "loss": 6.9829, "step": 25520 }, { "epoch": 0.04867591424968474, "grad_norm": 0.05709761753678322, "learning_rate": 1.850996207862101e-05, "loss": 6.9773, "step": 25530 }, { "epoch": 0.0489281210592686, "grad_norm": 0.06410641223192215, "learning_rate": 1.8486852694425606e-05, "loss": 6.9701, "step": 25540 }, { "epoch": 0.04918032786885246, "grad_norm": 0.06007826700806618, "learning_rate": 1.846375131910892e-05, "loss": 6.967, "step": 25550 }, { "epoch": 0.04943253467843632, "grad_norm": 0.058583617210388184, "learning_rate": 1.8440657968740947e-05, "loss": 6.9711, "step": 25560 }, { "epoch": 0.04968474148802018, "grad_norm": 0.07404392957687378, "learning_rate": 1.8417572659386087e-05, "loss": 6.9754, "step": 25570 }, { "epoch": 0.049936948297604034, "grad_norm": 0.06343639642000198, "learning_rate": 1.8394495407103155e-05, "loss": 6.9806, "step": 25580 }, { "epoch": 0.0501891551071879, "grad_norm": 0.07205548137426376, "learning_rate": 1.837142622794534e-05, "loss": 6.9768, "step": 25590 }, { "epoch": 0.05044136191677175, "grad_norm": 0.06769727915525436, "learning_rate": 1.8348365137960264e-05, "loss": 6.9789, "step": 25600 }, { "epoch": 0.05069356872635561, "grad_norm": 0.06840193271636963, "learning_rate": 1.8325312153189847e-05, "loss": 6.9729, "step": 25610 }, { "epoch": 0.05094577553593947, "grad_norm": 0.07150110602378845, "learning_rate": 1.8302267289670447e-05, "loss": 6.9795, "step": 25620 }, { "epoch": 0.05119798234552333, "grad_norm": 0.057460717856884, "learning_rate": 1.8279230563432725e-05, "loss": 6.9735, "step": 25630 }, { "epoch": 0.05145018915510719, "grad_norm": 0.06490689516067505, "learning_rate": 1.8256201990501696e-05, "loss": 6.9756, "step": 25640 }, { "epoch": 0.05170239596469105, "grad_norm": 0.06580715626478195, "learning_rate": 1.8233181586896703e-05, "loss": 6.9764, "step": 25650 }, { "epoch": 0.051954602774274904, "grad_norm": 0.0629386231303215, "learning_rate": 1.8210169368631404e-05, "loss": 6.9826, "step": 25660 }, { "epoch": 0.05220680958385877, "grad_norm": 0.07685097306966782, "learning_rate": 1.8187165351713767e-05, "loss": 6.9809, "step": 25670 }, { "epoch": 0.05245901639344262, "grad_norm": 0.0723591148853302, "learning_rate": 1.816416955214605e-05, "loss": 6.9771, "step": 25680 }, { "epoch": 0.05271122320302648, "grad_norm": 0.06712544709444046, "learning_rate": 1.81411819859248e-05, "loss": 6.9819, "step": 25690 }, { "epoch": 0.05296343001261034, "grad_norm": 0.037019047886133194, "learning_rate": 1.811820266904082e-05, "loss": 6.9673, "step": 25700 }, { "epoch": 0.0532156368221942, "grad_norm": 0.05820167809724808, "learning_rate": 1.8095231617479216e-05, "loss": 6.9555, "step": 25710 }, { "epoch": 0.05346784363177806, "grad_norm": 0.0602399967610836, "learning_rate": 1.8072268847219283e-05, "loss": 6.9703, "step": 25720 }, { "epoch": 0.05372005044136192, "grad_norm": 0.06991314888000488, "learning_rate": 1.8049314374234625e-05, "loss": 6.9856, "step": 25730 }, { "epoch": 0.053972257250945774, "grad_norm": 0.07089611887931824, "learning_rate": 1.8026368214493005e-05, "loss": 6.9815, "step": 25740 }, { "epoch": 0.05422446406052964, "grad_norm": 0.06648049503564835, "learning_rate": 1.8003430383956464e-05, "loss": 6.9779, "step": 25750 }, { "epoch": 0.05447667087011349, "grad_norm": 0.06833353638648987, "learning_rate": 1.7980500898581202e-05, "loss": 6.9817, "step": 25760 }, { "epoch": 0.05472887767969735, "grad_norm": 0.0736420676112175, "learning_rate": 1.7957579774317647e-05, "loss": 6.9822, "step": 25770 }, { "epoch": 0.05498108448928121, "grad_norm": 0.0734311118721962, "learning_rate": 1.793466702711039e-05, "loss": 6.9832, "step": 25780 }, { "epoch": 0.05523329129886507, "grad_norm": 0.0672043040394783, "learning_rate": 1.7911762672898192e-05, "loss": 6.9813, "step": 25790 }, { "epoch": 0.05548549810844893, "grad_norm": 0.06875050812959671, "learning_rate": 1.7888866727614005e-05, "loss": 6.9802, "step": 25800 }, { "epoch": 0.05573770491803279, "grad_norm": 0.07211349159479141, "learning_rate": 1.7865979207184896e-05, "loss": 6.9847, "step": 25810 }, { "epoch": 0.055989911727616644, "grad_norm": 0.05767359957098961, "learning_rate": 1.784310012753209e-05, "loss": 6.9781, "step": 25820 }, { "epoch": 0.05624211853720051, "grad_norm": 0.06595113128423691, "learning_rate": 1.7820229504570945e-05, "loss": 6.9732, "step": 25830 }, { "epoch": 0.05649432534678436, "grad_norm": 0.06571856886148453, "learning_rate": 1.7797367354210923e-05, "loss": 6.9751, "step": 25840 }, { "epoch": 0.05674653215636822, "grad_norm": 0.06541367620229721, "learning_rate": 1.777451369235559e-05, "loss": 6.9742, "step": 25850 }, { "epoch": 0.05699873896595208, "grad_norm": 0.07471218705177307, "learning_rate": 1.7751668534902642e-05, "loss": 6.9811, "step": 25860 }, { "epoch": 0.05725094577553594, "grad_norm": 0.07443960756063461, "learning_rate": 1.7728831897743794e-05, "loss": 6.9876, "step": 25870 }, { "epoch": 0.0575031525851198, "grad_norm": 0.07341449707746506, "learning_rate": 1.7706003796764904e-05, "loss": 6.9814, "step": 25880 }, { "epoch": 0.05775535939470366, "grad_norm": 0.05876440182328224, "learning_rate": 1.7683184247845845e-05, "loss": 6.9785, "step": 25890 }, { "epoch": 0.058007566204287514, "grad_norm": 0.07106555253267288, "learning_rate": 1.7660373266860563e-05, "loss": 6.9814, "step": 25900 }, { "epoch": 0.05825977301387138, "grad_norm": 0.07030203938484192, "learning_rate": 1.7637570869677038e-05, "loss": 6.9826, "step": 25910 }, { "epoch": 0.05851197982345523, "grad_norm": 0.06946060061454773, "learning_rate": 1.7614777072157263e-05, "loss": 6.9831, "step": 25920 }, { "epoch": 0.05876418663303909, "grad_norm": 0.0730338767170906, "learning_rate": 1.7591991890157284e-05, "loss": 6.9853, "step": 25930 }, { "epoch": 0.05901639344262295, "grad_norm": 0.07550230622291565, "learning_rate": 1.7569215339527115e-05, "loss": 6.9871, "step": 25940 }, { "epoch": 0.05926860025220681, "grad_norm": 0.056133631616830826, "learning_rate": 1.75464474361108e-05, "loss": 6.9776, "step": 25950 }, { "epoch": 0.05952080706179067, "grad_norm": 0.056981030851602554, "learning_rate": 1.752368819574634e-05, "loss": 6.9731, "step": 25960 }, { "epoch": 0.05977301387137453, "grad_norm": 0.05678236484527588, "learning_rate": 1.750093763426573e-05, "loss": 6.9721, "step": 25970 }, { "epoch": 0.060025220680958384, "grad_norm": 0.06571102887392044, "learning_rate": 1.7478195767494908e-05, "loss": 6.9735, "step": 25980 }, { "epoch": 0.06027742749054225, "grad_norm": 0.07281024008989334, "learning_rate": 1.7455462611253793e-05, "loss": 6.9807, "step": 25990 }, { "epoch": 0.0605296343001261, "grad_norm": 0.05410727486014366, "learning_rate": 1.743273818135622e-05, "loss": 6.9697, "step": 26000 }, { "epoch": 0.06078184110970996, "grad_norm": 0.05123651772737503, "learning_rate": 1.7412293668537376e-05, "loss": 6.9678, "step": 26010 }, { "epoch": 0.06103404791929382, "grad_norm": 0.05393272265791893, "learning_rate": 1.7389585862237913e-05, "loss": 6.9695, "step": 26020 }, { "epoch": 0.06128625472887768, "grad_norm": 0.06844967603683472, "learning_rate": 1.736688682810776e-05, "loss": 6.9802, "step": 26030 }, { "epoch": 0.06153846153846154, "grad_norm": 0.07131237536668777, "learning_rate": 1.7344196581937024e-05, "loss": 6.9798, "step": 26040 }, { "epoch": 0.0617906683480454, "grad_norm": 0.06589873880147934, "learning_rate": 1.7321515139509706e-05, "loss": 6.9797, "step": 26050 }, { "epoch": 0.062042875157629254, "grad_norm": 0.06278558820486069, "learning_rate": 1.729884251660367e-05, "loss": 6.98, "step": 26060 }, { "epoch": 0.06229508196721312, "grad_norm": 0.053359683603048325, "learning_rate": 1.7276178728990645e-05, "loss": 6.9704, "step": 26070 }, { "epoch": 0.06254728877679698, "grad_norm": 0.0687716081738472, "learning_rate": 1.725352379243621e-05, "loss": 6.981, "step": 26080 }, { "epoch": 0.06279949558638083, "grad_norm": 0.07420646399259567, "learning_rate": 1.7230877722699822e-05, "loss": 6.9868, "step": 26090 }, { "epoch": 0.06305170239596469, "grad_norm": 0.06809783726930618, "learning_rate": 1.7208240535534706e-05, "loss": 6.9853, "step": 26100 }, { "epoch": 0.06330390920554856, "grad_norm": 0.06682822108268738, "learning_rate": 1.7185612246687988e-05, "loss": 6.9807, "step": 26110 }, { "epoch": 0.0635561160151324, "grad_norm": 0.07530169934034348, "learning_rate": 1.716299287190053e-05, "loss": 6.9848, "step": 26120 }, { "epoch": 0.06380832282471627, "grad_norm": 0.07007043808698654, "learning_rate": 1.7140382426907038e-05, "loss": 6.9851, "step": 26130 }, { "epoch": 0.06406052963430013, "grad_norm": 0.07345443218946457, "learning_rate": 1.7117780927435977e-05, "loss": 6.9852, "step": 26140 }, { "epoch": 0.06431273644388398, "grad_norm": 0.06303220242261887, "learning_rate": 1.709518838920963e-05, "loss": 6.9746, "step": 26150 }, { "epoch": 0.06456494325346784, "grad_norm": 0.059055082499980927, "learning_rate": 1.7072604827944e-05, "loss": 6.9738, "step": 26160 }, { "epoch": 0.0648171500630517, "grad_norm": 0.06054585799574852, "learning_rate": 1.7050030259348874e-05, "loss": 6.9723, "step": 26170 }, { "epoch": 0.06506935687263556, "grad_norm": 0.060022421181201935, "learning_rate": 1.7027464699127775e-05, "loss": 6.972, "step": 26180 }, { "epoch": 0.06532156368221942, "grad_norm": 0.07039963454008102, "learning_rate": 1.7004908162977947e-05, "loss": 6.9794, "step": 26190 }, { "epoch": 0.06557377049180328, "grad_norm": 0.06935133785009384, "learning_rate": 1.6982360666590395e-05, "loss": 6.9819, "step": 26200 }, { "epoch": 0.06582597730138713, "grad_norm": 0.07516192644834518, "learning_rate": 1.6959822225649784e-05, "loss": 6.9846, "step": 26210 }, { "epoch": 0.066078184110971, "grad_norm": 0.06364540010690689, "learning_rate": 1.6937292855834547e-05, "loss": 6.9801, "step": 26220 }, { "epoch": 0.06633039092055486, "grad_norm": 0.05569463223218918, "learning_rate": 1.6914772572816716e-05, "loss": 6.9657, "step": 26230 }, { "epoch": 0.06658259773013872, "grad_norm": 0.06077327951788902, "learning_rate": 1.6892261392262084e-05, "loss": 6.9695, "step": 26240 }, { "epoch": 0.06683480453972257, "grad_norm": 0.05754045024514198, "learning_rate": 1.6869759329830063e-05, "loss": 6.9721, "step": 26250 }, { "epoch": 0.06708701134930643, "grad_norm": 0.06172797456383705, "learning_rate": 1.6847266401173758e-05, "loss": 6.9785, "step": 26260 }, { "epoch": 0.0673392181588903, "grad_norm": 0.065339095890522, "learning_rate": 1.6824782621939894e-05, "loss": 6.9801, "step": 26270 }, { "epoch": 0.06759142496847415, "grad_norm": 0.07162079215049744, "learning_rate": 1.6802308007768836e-05, "loss": 6.9795, "step": 26280 }, { "epoch": 0.06784363177805801, "grad_norm": 0.06605413556098938, "learning_rate": 1.6779842574294576e-05, "loss": 6.9804, "step": 26290 }, { "epoch": 0.06809583858764187, "grad_norm": 0.07883854955434799, "learning_rate": 1.675738633714471e-05, "loss": 6.9842, "step": 26300 }, { "epoch": 0.06834804539722572, "grad_norm": 0.06231728941202164, "learning_rate": 1.673493931194047e-05, "loss": 6.9774, "step": 26310 }, { "epoch": 0.06860025220680958, "grad_norm": 0.07336857914924622, "learning_rate": 1.671250151429665e-05, "loss": 6.9789, "step": 26320 }, { "epoch": 0.06885245901639345, "grad_norm": 0.06971146911382675, "learning_rate": 1.6690072959821623e-05, "loss": 6.985, "step": 26330 }, { "epoch": 0.0691046658259773, "grad_norm": 0.07335763424634933, "learning_rate": 1.6667653664117332e-05, "loss": 6.9843, "step": 26340 }, { "epoch": 0.06935687263556116, "grad_norm": 0.06594116240739822, "learning_rate": 1.6645243642779316e-05, "loss": 6.982, "step": 26350 }, { "epoch": 0.06960907944514502, "grad_norm": 0.06756185740232468, "learning_rate": 1.662284291139661e-05, "loss": 6.9808, "step": 26360 }, { "epoch": 0.06986128625472887, "grad_norm": 0.07145228981971741, "learning_rate": 1.6600451485551832e-05, "loss": 6.9836, "step": 26370 }, { "epoch": 0.07011349306431273, "grad_norm": 0.060687992721796036, "learning_rate": 1.65780693808211e-05, "loss": 6.977, "step": 26380 }, { "epoch": 0.0703656998738966, "grad_norm": 0.05970882624387741, "learning_rate": 1.6555696612774046e-05, "loss": 6.9754, "step": 26390 }, { "epoch": 0.07061790668348046, "grad_norm": 0.058615393936634064, "learning_rate": 1.6533333196973822e-05, "loss": 6.9674, "step": 26400 }, { "epoch": 0.07087011349306431, "grad_norm": 0.05584177002310753, "learning_rate": 1.651097914897706e-05, "loss": 6.9503, "step": 26410 }, { "epoch": 0.07112232030264817, "grad_norm": 0.06289201229810715, "learning_rate": 1.6488634484333896e-05, "loss": 6.9672, "step": 26420 }, { "epoch": 0.07137452711223204, "grad_norm": 0.062416888773441315, "learning_rate": 1.646629921858792e-05, "loss": 6.9752, "step": 26430 }, { "epoch": 0.07162673392181589, "grad_norm": 0.06519443541765213, "learning_rate": 1.644397336727619e-05, "loss": 6.9803, "step": 26440 }, { "epoch": 0.07187894073139975, "grad_norm": 0.06678438931703568, "learning_rate": 1.64216569459292e-05, "loss": 6.9785, "step": 26450 }, { "epoch": 0.07213114754098361, "grad_norm": 0.06730299443006516, "learning_rate": 1.6399349970070925e-05, "loss": 6.9767, "step": 26460 }, { "epoch": 0.07238335435056746, "grad_norm": 0.06732653826475143, "learning_rate": 1.6377052455218723e-05, "loss": 6.9775, "step": 26470 }, { "epoch": 0.07263556116015132, "grad_norm": 0.06876540184020996, "learning_rate": 1.635476441688342e-05, "loss": 6.9802, "step": 26480 }, { "epoch": 0.07288776796973519, "grad_norm": 0.07175049930810928, "learning_rate": 1.633248587056918e-05, "loss": 6.9806, "step": 26490 }, { "epoch": 0.07313997477931904, "grad_norm": 0.07329666614532471, "learning_rate": 1.631021683177363e-05, "loss": 6.9848, "step": 26500 }, { "epoch": 0.0733921815889029, "grad_norm": 0.07672873139381409, "learning_rate": 1.628795731598775e-05, "loss": 6.9858, "step": 26510 }, { "epoch": 0.07364438839848676, "grad_norm": 0.06920111179351807, "learning_rate": 1.6265707338695917e-05, "loss": 6.9849, "step": 26520 }, { "epoch": 0.07389659520807061, "grad_norm": 0.05885031819343567, "learning_rate": 1.6243466915375853e-05, "loss": 6.9769, "step": 26530 }, { "epoch": 0.07414880201765447, "grad_norm": 0.06525301933288574, "learning_rate": 1.6221236061498636e-05, "loss": 6.9775, "step": 26540 }, { "epoch": 0.07440100882723834, "grad_norm": 0.05989408865571022, "learning_rate": 1.619901479252869e-05, "loss": 6.975, "step": 26550 }, { "epoch": 0.0746532156368222, "grad_norm": 0.05729896202683449, "learning_rate": 1.617680312392377e-05, "loss": 6.9533, "step": 26560 }, { "epoch": 0.07490542244640605, "grad_norm": 0.05730641633272171, "learning_rate": 1.6154601071134978e-05, "loss": 6.9654, "step": 26570 }, { "epoch": 0.07515762925598991, "grad_norm": 0.07078201323747635, "learning_rate": 1.613240864960668e-05, "loss": 6.9776, "step": 26580 }, { "epoch": 0.07540983606557378, "grad_norm": 0.06893301755189896, "learning_rate": 1.611022587477659e-05, "loss": 6.9825, "step": 26590 }, { "epoch": 0.07566204287515763, "grad_norm": 0.06960565596818924, "learning_rate": 1.6088052762075656e-05, "loss": 6.9831, "step": 26600 }, { "epoch": 0.07591424968474149, "grad_norm": 0.06577733159065247, "learning_rate": 1.6065889326928176e-05, "loss": 6.9825, "step": 26610 }, { "epoch": 0.07616645649432535, "grad_norm": 0.06734424084424973, "learning_rate": 1.604373558475164e-05, "loss": 6.9801, "step": 26620 }, { "epoch": 0.0764186633039092, "grad_norm": 0.06787553429603577, "learning_rate": 1.6021591550956868e-05, "loss": 6.9799, "step": 26630 }, { "epoch": 0.07667087011349306, "grad_norm": 0.05947215110063553, "learning_rate": 1.5999457240947874e-05, "loss": 6.9763, "step": 26640 }, { "epoch": 0.07692307692307693, "grad_norm": 0.05813184753060341, "learning_rate": 1.597733267012192e-05, "loss": 6.9657, "step": 26650 }, { "epoch": 0.07717528373266078, "grad_norm": 0.06031441688537598, "learning_rate": 1.5955217853869512e-05, "loss": 6.9672, "step": 26660 }, { "epoch": 0.07742749054224464, "grad_norm": 0.05795248970389366, "learning_rate": 1.5933112807574335e-05, "loss": 6.9728, "step": 26670 }, { "epoch": 0.0776796973518285, "grad_norm": 0.0660284236073494, "learning_rate": 1.591101754661333e-05, "loss": 6.976, "step": 26680 }, { "epoch": 0.07793190416141235, "grad_norm": 0.06548532843589783, "learning_rate": 1.5888932086356578e-05, "loss": 6.9786, "step": 26690 }, { "epoch": 0.07818411097099622, "grad_norm": 0.06751688569784164, "learning_rate": 1.586685644216737e-05, "loss": 6.9781, "step": 26700 }, { "epoch": 0.07843631778058008, "grad_norm": 0.07372871041297913, "learning_rate": 1.5844790629402154e-05, "loss": 6.9823, "step": 26710 }, { "epoch": 0.07868852459016394, "grad_norm": 0.07072052359580994, "learning_rate": 1.582273466341057e-05, "loss": 6.9849, "step": 26720 }, { "epoch": 0.07894073139974779, "grad_norm": 0.06988208740949631, "learning_rate": 1.580068855953537e-05, "loss": 6.986, "step": 26730 }, { "epoch": 0.07919293820933165, "grad_norm": 0.07226698845624924, "learning_rate": 1.5778652333112482e-05, "loss": 6.9846, "step": 26740 }, { "epoch": 0.07944514501891552, "grad_norm": 0.059055693447589874, "learning_rate": 1.575662599947091e-05, "loss": 6.978, "step": 26750 }, { "epoch": 0.07969735182849937, "grad_norm": 0.06187283247709274, "learning_rate": 1.573460957393284e-05, "loss": 6.9779, "step": 26760 }, { "epoch": 0.07994955863808323, "grad_norm": 0.04601234570145607, "learning_rate": 1.5712603071813524e-05, "loss": 6.9754, "step": 26770 }, { "epoch": 0.08020176544766709, "grad_norm": 0.05582762137055397, "learning_rate": 1.569060650842131e-05, "loss": 6.9695, "step": 26780 }, { "epoch": 0.08045397225725094, "grad_norm": 0.06278357654809952, "learning_rate": 1.566861989905767e-05, "loss": 6.974, "step": 26790 }, { "epoch": 0.0807061790668348, "grad_norm": 0.05789360776543617, "learning_rate": 1.5646643259017117e-05, "loss": 6.9739, "step": 26800 }, { "epoch": 0.08095838587641867, "grad_norm": 0.06773345917463303, "learning_rate": 1.5624676603587226e-05, "loss": 6.9732, "step": 26810 }, { "epoch": 0.08121059268600252, "grad_norm": 0.07064909487962723, "learning_rate": 1.5602719948048637e-05, "loss": 6.9808, "step": 26820 }, { "epoch": 0.08146279949558638, "grad_norm": 0.07242527604103088, "learning_rate": 1.5580773307675063e-05, "loss": 6.9817, "step": 26830 }, { "epoch": 0.08171500630517024, "grad_norm": 0.07280120253562927, "learning_rate": 1.55588366977332e-05, "loss": 6.9818, "step": 26840 }, { "epoch": 0.08196721311475409, "grad_norm": 0.064843088388443, "learning_rate": 1.55369101334828e-05, "loss": 6.9812, "step": 26850 }, { "epoch": 0.08221941992433796, "grad_norm": 0.056587278842926025, "learning_rate": 1.55149936301766e-05, "loss": 6.9738, "step": 26860 }, { "epoch": 0.08247162673392182, "grad_norm": 0.05924570932984352, "learning_rate": 1.549308720306037e-05, "loss": 6.9729, "step": 26870 }, { "epoch": 0.08272383354350568, "grad_norm": 0.05437285453081131, "learning_rate": 1.5471190867372845e-05, "loss": 6.972, "step": 26880 }, { "epoch": 0.08297604035308953, "grad_norm": 0.07193481177091599, "learning_rate": 1.5449304638345766e-05, "loss": 6.9795, "step": 26890 }, { "epoch": 0.0832282471626734, "grad_norm": 0.07170674204826355, "learning_rate": 1.542742853120383e-05, "loss": 6.9837, "step": 26900 }, { "epoch": 0.08348045397225726, "grad_norm": 0.0689728781580925, "learning_rate": 1.540556256116465e-05, "loss": 6.9814, "step": 26910 }, { "epoch": 0.0837326607818411, "grad_norm": 0.0661558210849762, "learning_rate": 1.5383706743438873e-05, "loss": 6.9805, "step": 26920 }, { "epoch": 0.08398486759142497, "grad_norm": 0.0625859722495079, "learning_rate": 1.5361861093230012e-05, "loss": 6.9816, "step": 26930 }, { "epoch": 0.08423707440100883, "grad_norm": 0.0616728737950325, "learning_rate": 1.5340025625734552e-05, "loss": 6.9786, "step": 26940 }, { "epoch": 0.08448928121059268, "grad_norm": 0.05736369639635086, "learning_rate": 1.531820035614187e-05, "loss": 6.9735, "step": 26950 }, { "epoch": 0.08474148802017654, "grad_norm": 0.06567522138357162, "learning_rate": 1.5296385299634256e-05, "loss": 6.9749, "step": 26960 }, { "epoch": 0.08499369482976041, "grad_norm": 0.07101672142744064, "learning_rate": 1.527458047138688e-05, "loss": 6.977, "step": 26970 }, { "epoch": 0.08524590163934426, "grad_norm": 0.06394845992326736, "learning_rate": 1.5252785886567842e-05, "loss": 6.9774, "step": 26980 }, { "epoch": 0.08549810844892812, "grad_norm": 0.05499110370874405, "learning_rate": 1.5231001560338054e-05, "loss": 6.9717, "step": 26990 }, { "epoch": 0.08575031525851198, "grad_norm": 0.05613207444548607, "learning_rate": 1.5209227507851362e-05, "loss": 6.9733, "step": 27000 }, { "epoch": 0.08600252206809583, "grad_norm": 0.06337690353393555, "learning_rate": 1.518746374425439e-05, "loss": 6.9788, "step": 27010 }, { "epoch": 0.0862547288776797, "grad_norm": 0.06395134329795837, "learning_rate": 1.5165710284686662e-05, "loss": 6.9771, "step": 27020 }, { "epoch": 0.08650693568726356, "grad_norm": 0.05856534466147423, "learning_rate": 1.514396714428052e-05, "loss": 6.9734, "step": 27030 }, { "epoch": 0.08675914249684742, "grad_norm": 0.06000562757253647, "learning_rate": 1.5122234338161096e-05, "loss": 6.9733, "step": 27040 }, { "epoch": 0.08701134930643127, "grad_norm": 0.07056010514497757, "learning_rate": 1.5100511881446385e-05, "loss": 6.9789, "step": 27050 }, { "epoch": 0.08726355611601513, "grad_norm": 0.07265526056289673, "learning_rate": 1.5078799789247154e-05, "loss": 6.9836, "step": 27060 }, { "epoch": 0.087515762925599, "grad_norm": 0.06384999305009842, "learning_rate": 1.5057098076666956e-05, "loss": 6.9768, "step": 27070 }, { "epoch": 0.08776796973518285, "grad_norm": 0.05531387776136398, "learning_rate": 1.5035406758802121e-05, "loss": 6.9734, "step": 27080 }, { "epoch": 0.08802017654476671, "grad_norm": 0.06701939553022385, "learning_rate": 1.5013725850741781e-05, "loss": 6.9767, "step": 27090 }, { "epoch": 0.08827238335435057, "grad_norm": 0.06665945798158646, "learning_rate": 1.4992055367567791e-05, "loss": 6.9798, "step": 27100 }, { "epoch": 0.08852459016393442, "grad_norm": 0.07092229276895523, "learning_rate": 1.4970395324354772e-05, "loss": 6.9818, "step": 27110 }, { "epoch": 0.08877679697351828, "grad_norm": 0.06601201742887497, "learning_rate": 1.494874573617006e-05, "loss": 6.9834, "step": 27120 }, { "epoch": 0.08902900378310215, "grad_norm": 0.05996740236878395, "learning_rate": 1.4927106618073758e-05, "loss": 6.9773, "step": 27130 }, { "epoch": 0.089281210592686, "grad_norm": 0.07086359709501266, "learning_rate": 1.4905477985118656e-05, "loss": 6.9841, "step": 27140 }, { "epoch": 0.08953341740226986, "grad_norm": 0.057227764278650284, "learning_rate": 1.4883859852350245e-05, "loss": 6.9769, "step": 27150 }, { "epoch": 0.08978562421185372, "grad_norm": 0.07136600464582443, "learning_rate": 1.4862252234806754e-05, "loss": 6.9789, "step": 27160 }, { "epoch": 0.09003783102143757, "grad_norm": 0.07027342915534973, "learning_rate": 1.484065514751903e-05, "loss": 6.9815, "step": 27170 }, { "epoch": 0.09029003783102144, "grad_norm": 0.06147473677992821, "learning_rate": 1.481906860551066e-05, "loss": 6.9804, "step": 27180 }, { "epoch": 0.0905422446406053, "grad_norm": 0.030294254422187805, "learning_rate": 1.4797492623797851e-05, "loss": 6.9677, "step": 27190 }, { "epoch": 0.09079445145018916, "grad_norm": 0.0460299588739872, "learning_rate": 1.4775927217389497e-05, "loss": 6.9542, "step": 27200 }, { "epoch": 0.09104665825977301, "grad_norm": 0.06140071898698807, "learning_rate": 1.4754372401287111e-05, "loss": 6.9694, "step": 27210 }, { "epoch": 0.09129886506935687, "grad_norm": 0.0728689506649971, "learning_rate": 1.4732828190484849e-05, "loss": 6.9841, "step": 27220 }, { "epoch": 0.09155107187894074, "grad_norm": 0.07221809029579163, "learning_rate": 1.4711294599969472e-05, "loss": 6.9828, "step": 27230 }, { "epoch": 0.09180327868852459, "grad_norm": 0.05323660746216774, "learning_rate": 1.4689771644720395e-05, "loss": 6.978, "step": 27240 }, { "epoch": 0.09205548549810845, "grad_norm": 0.047984953969717026, "learning_rate": 1.4668259339709591e-05, "loss": 6.9642, "step": 27250 }, { "epoch": 0.09230769230769231, "grad_norm": 0.05315428227186203, "learning_rate": 1.4646757699901667e-05, "loss": 6.9655, "step": 27260 }, { "epoch": 0.09255989911727616, "grad_norm": 0.055974218994379044, "learning_rate": 1.462526674025376e-05, "loss": 6.9674, "step": 27270 }, { "epoch": 0.09281210592686003, "grad_norm": 0.06345855444669724, "learning_rate": 1.46037864757156e-05, "loss": 6.9769, "step": 27280 }, { "epoch": 0.09306431273644389, "grad_norm": 0.058525748550891876, "learning_rate": 1.4582316921229506e-05, "loss": 6.9735, "step": 27290 }, { "epoch": 0.09331651954602774, "grad_norm": 0.05416250601410866, "learning_rate": 1.4560858091730297e-05, "loss": 6.9713, "step": 27300 }, { "epoch": 0.0935687263556116, "grad_norm": 0.05701015144586563, "learning_rate": 1.4539410002145383e-05, "loss": 6.9672, "step": 27310 }, { "epoch": 0.09382093316519546, "grad_norm": 0.06088685244321823, "learning_rate": 1.4517972667394658e-05, "loss": 6.9725, "step": 27320 }, { "epoch": 0.09407313997477931, "grad_norm": 0.06420265883207321, "learning_rate": 1.449654610239056e-05, "loss": 6.9744, "step": 27330 }, { "epoch": 0.09432534678436318, "grad_norm": 0.0709427073597908, "learning_rate": 1.4475130322038013e-05, "loss": 6.9744, "step": 27340 }, { "epoch": 0.09457755359394704, "grad_norm": 0.07511159032583237, "learning_rate": 1.4453725341234479e-05, "loss": 6.9845, "step": 27350 }, { "epoch": 0.0948297604035309, "grad_norm": 0.07337964326143265, "learning_rate": 1.4432331174869871e-05, "loss": 6.9844, "step": 27360 }, { "epoch": 0.09508196721311475, "grad_norm": 0.07421460002660751, "learning_rate": 1.4410947837826588e-05, "loss": 6.9854, "step": 27370 }, { "epoch": 0.09533417402269861, "grad_norm": 0.06925079226493835, "learning_rate": 1.43895753449795e-05, "loss": 6.9842, "step": 27380 }, { "epoch": 0.09558638083228248, "grad_norm": 0.07204020023345947, "learning_rate": 1.436821371119592e-05, "loss": 6.9857, "step": 27390 }, { "epoch": 0.09583858764186633, "grad_norm": 0.06979536265134811, "learning_rate": 1.4346862951335645e-05, "loss": 6.9842, "step": 27400 }, { "epoch": 0.09609079445145019, "grad_norm": 0.07026398926973343, "learning_rate": 1.432552308025086e-05, "loss": 6.9828, "step": 27410 }, { "epoch": 0.09634300126103405, "grad_norm": 0.0713929831981659, "learning_rate": 1.4304194112786221e-05, "loss": 6.9817, "step": 27420 }, { "epoch": 0.0965952080706179, "grad_norm": 0.06556617468595505, "learning_rate": 1.4282876063778744e-05, "loss": 6.9806, "step": 27430 }, { "epoch": 0.09684741488020177, "grad_norm": 0.049841560423374176, "learning_rate": 1.4261568948057908e-05, "loss": 6.9753, "step": 27440 }, { "epoch": 0.09709962168978563, "grad_norm": 0.05717701092362404, "learning_rate": 1.4240272780445539e-05, "loss": 6.9711, "step": 27450 }, { "epoch": 0.09735182849936948, "grad_norm": 0.06262341141700745, "learning_rate": 1.4218987575755886e-05, "loss": 6.9757, "step": 27460 }, { "epoch": 0.09760403530895334, "grad_norm": 0.06662409752607346, "learning_rate": 1.4197713348795552e-05, "loss": 6.9793, "step": 27470 }, { "epoch": 0.0978562421185372, "grad_norm": 0.07081175595521927, "learning_rate": 1.4176450114363496e-05, "loss": 6.9814, "step": 27480 }, { "epoch": 0.09810844892812105, "grad_norm": 0.07107632607221603, "learning_rate": 1.4155197887251035e-05, "loss": 6.9826, "step": 27490 }, { "epoch": 0.09836065573770492, "grad_norm": 0.0642974004149437, "learning_rate": 1.4133956682241852e-05, "loss": 6.9765, "step": 27500 }, { "epoch": 0.09861286254728878, "grad_norm": 0.0558718778192997, "learning_rate": 1.4112726514111935e-05, "loss": 6.9719, "step": 27510 }, { "epoch": 0.09886506935687264, "grad_norm": 0.031837478280067444, "learning_rate": 1.4091507397629607e-05, "loss": 6.9624, "step": 27520 }, { "epoch": 0.09911727616645649, "grad_norm": 0.043533746153116226, "learning_rate": 1.4070299347555493e-05, "loss": 6.955, "step": 27530 }, { "epoch": 0.09936948297604035, "grad_norm": 0.05801338702440262, "learning_rate": 1.404910237864252e-05, "loss": 6.9698, "step": 27540 }, { "epoch": 0.09962168978562422, "grad_norm": 0.060828570276498795, "learning_rate": 1.4027916505635935e-05, "loss": 6.98, "step": 27550 }, { "epoch": 0.09987389659520807, "grad_norm": 0.061496201902627945, "learning_rate": 1.4006741743273222e-05, "loss": 6.9751, "step": 27560 }, { "epoch": 0.10012610340479193, "grad_norm": 0.06916064769029617, "learning_rate": 1.3985578106284185e-05, "loss": 6.9778, "step": 27570 }, { "epoch": 0.1003783102143758, "grad_norm": 0.06309553980827332, "learning_rate": 1.3964425609390845e-05, "loss": 6.9761, "step": 27580 }, { "epoch": 0.10063051702395964, "grad_norm": 0.061032600700855255, "learning_rate": 1.39432842673075e-05, "loss": 6.9755, "step": 27590 }, { "epoch": 0.1008827238335435, "grad_norm": 0.06694125384092331, "learning_rate": 1.392215409474066e-05, "loss": 6.977, "step": 27600 }, { "epoch": 0.10113493064312737, "grad_norm": 0.05968431010842323, "learning_rate": 1.3901035106389114e-05, "loss": 6.9755, "step": 27610 }, { "epoch": 0.10138713745271122, "grad_norm": 0.06465157121419907, "learning_rate": 1.3879927316943826e-05, "loss": 6.9748, "step": 27620 }, { "epoch": 0.10163934426229508, "grad_norm": 0.07226986438035965, "learning_rate": 1.385883074108799e-05, "loss": 6.9819, "step": 27630 }, { "epoch": 0.10189155107187894, "grad_norm": 0.07396721839904785, "learning_rate": 1.3837745393496992e-05, "loss": 6.9838, "step": 27640 }, { "epoch": 0.1021437578814628, "grad_norm": 0.06905898451805115, "learning_rate": 1.3816671288838401e-05, "loss": 6.9842, "step": 27650 }, { "epoch": 0.10239596469104666, "grad_norm": 0.07130999118089676, "learning_rate": 1.3795608441771994e-05, "loss": 6.9832, "step": 27660 }, { "epoch": 0.10264817150063052, "grad_norm": 0.0696120485663414, "learning_rate": 1.3774556866949682e-05, "loss": 6.9819, "step": 27670 }, { "epoch": 0.10290037831021438, "grad_norm": 0.05474436283111572, "learning_rate": 1.3753516579015575e-05, "loss": 6.9746, "step": 27680 }, { "epoch": 0.10315258511979823, "grad_norm": 0.05504733696579933, "learning_rate": 1.3732487592605868e-05, "loss": 6.9675, "step": 27690 }, { "epoch": 0.1034047919293821, "grad_norm": 0.056689683347940445, "learning_rate": 1.3711469922348966e-05, "loss": 6.9701, "step": 27700 }, { "epoch": 0.10365699873896596, "grad_norm": 0.0572512187063694, "learning_rate": 1.3690463582865344e-05, "loss": 6.9715, "step": 27710 }, { "epoch": 0.10390920554854981, "grad_norm": 0.07231101393699646, "learning_rate": 1.3669468588767642e-05, "loss": 6.9725, "step": 27720 }, { "epoch": 0.10416141235813367, "grad_norm": 0.06914666295051575, "learning_rate": 1.3648484954660578e-05, "loss": 6.9817, "step": 27730 }, { "epoch": 0.10441361916771753, "grad_norm": 0.06252267956733704, "learning_rate": 1.3627512695140977e-05, "loss": 6.978, "step": 27740 }, { "epoch": 0.10466582597730138, "grad_norm": 0.06790090352296829, "learning_rate": 1.3606551824797747e-05, "loss": 6.9793, "step": 27750 }, { "epoch": 0.10491803278688525, "grad_norm": 0.06660401821136475, "learning_rate": 1.3585602358211863e-05, "loss": 6.9798, "step": 27760 }, { "epoch": 0.10517023959646911, "grad_norm": 0.06833840161561966, "learning_rate": 1.3564664309956409e-05, "loss": 6.9781, "step": 27770 }, { "epoch": 0.10542244640605296, "grad_norm": 0.06006806343793869, "learning_rate": 1.3543737694596483e-05, "loss": 6.9809, "step": 27780 }, { "epoch": 0.10567465321563682, "grad_norm": 0.06960756331682205, "learning_rate": 1.3522822526689243e-05, "loss": 6.9761, "step": 27790 }, { "epoch": 0.10592686002522068, "grad_norm": 0.07164149731397629, "learning_rate": 1.350191882078388e-05, "loss": 6.9829, "step": 27800 }, { "epoch": 0.10617906683480453, "grad_norm": 0.06960498541593552, "learning_rate": 1.3481026591421632e-05, "loss": 6.9823, "step": 27810 }, { "epoch": 0.1064312736443884, "grad_norm": 0.07096568495035172, "learning_rate": 1.3460145853135726e-05, "loss": 6.9809, "step": 27820 }, { "epoch": 0.10668348045397226, "grad_norm": 0.07047203183174133, "learning_rate": 1.3439276620451423e-05, "loss": 6.9803, "step": 27830 }, { "epoch": 0.10693568726355612, "grad_norm": 0.056791871786117554, "learning_rate": 1.3418418907885957e-05, "loss": 6.973, "step": 27840 }, { "epoch": 0.10718789407313997, "grad_norm": 0.05824240669608116, "learning_rate": 1.3397572729948557e-05, "loss": 6.9711, "step": 27850 }, { "epoch": 0.10744010088272384, "grad_norm": 0.059778936207294464, "learning_rate": 1.337673810114042e-05, "loss": 6.9732, "step": 27860 }, { "epoch": 0.1076923076923077, "grad_norm": 0.06752664595842361, "learning_rate": 1.3355915035954732e-05, "loss": 6.98, "step": 27870 }, { "epoch": 0.10794451450189155, "grad_norm": 0.05872386321425438, "learning_rate": 1.3335103548876617e-05, "loss": 6.9779, "step": 27880 }, { "epoch": 0.10819672131147541, "grad_norm": 0.05994803085923195, "learning_rate": 1.3314303654383144e-05, "loss": 6.9723, "step": 27890 }, { "epoch": 0.10844892812105927, "grad_norm": 0.05379655584692955, "learning_rate": 1.3293515366943323e-05, "loss": 6.9744, "step": 27900 }, { "epoch": 0.10870113493064312, "grad_norm": 0.04237983003258705, "learning_rate": 1.3272738701018075e-05, "loss": 6.9738, "step": 27910 }, { "epoch": 0.10895334174022699, "grad_norm": 0.04764416813850403, "learning_rate": 1.3251973671060281e-05, "loss": 6.9554, "step": 27920 }, { "epoch": 0.10920554854981085, "grad_norm": 0.05592659115791321, "learning_rate": 1.3231220291514665e-05, "loss": 6.9662, "step": 27930 }, { "epoch": 0.1094577553593947, "grad_norm": 0.0575064979493618, "learning_rate": 1.3210478576817919e-05, "loss": 6.9716, "step": 27940 }, { "epoch": 0.10970996216897856, "grad_norm": 0.05766468867659569, "learning_rate": 1.3189748541398531e-05, "loss": 6.9759, "step": 27950 }, { "epoch": 0.10996216897856242, "grad_norm": 0.061609651893377304, "learning_rate": 1.3169030199676954e-05, "loss": 6.9758, "step": 27960 }, { "epoch": 0.11021437578814627, "grad_norm": 0.062367312610149384, "learning_rate": 1.314832356606544e-05, "loss": 6.9792, "step": 27970 }, { "epoch": 0.11046658259773014, "grad_norm": 0.061487652361392975, "learning_rate": 1.3127628654968143e-05, "loss": 6.9744, "step": 27980 }, { "epoch": 0.110718789407314, "grad_norm": 0.061859484761953354, "learning_rate": 1.3106945480781037e-05, "loss": 6.9737, "step": 27990 }, { "epoch": 0.11097099621689786, "grad_norm": 0.06243722885847092, "learning_rate": 1.3086274057891931e-05, "loss": 6.9737, "step": 28000 }, { "epoch": 0.11122320302648171, "grad_norm": 0.07044077664613724, "learning_rate": 1.3067679836536472e-05, "loss": 6.9762, "step": 28010 }, { "epoch": 0.11147540983606558, "grad_norm": 0.06926742196083069, "learning_rate": 1.304703078072272e-05, "loss": 6.9854, "step": 28020 }, { "epoch": 0.11172761664564944, "grad_norm": 0.06294234842061996, "learning_rate": 1.3026393517885358e-05, "loss": 6.9838, "step": 28030 }, { "epoch": 0.11197982345523329, "grad_norm": 0.05504387989640236, "learning_rate": 1.3005768062380293e-05, "loss": 6.9763, "step": 28040 }, { "epoch": 0.11223203026481715, "grad_norm": 0.06389061361551285, "learning_rate": 1.2985154428555173e-05, "loss": 6.9748, "step": 28050 }, { "epoch": 0.11248423707440101, "grad_norm": 0.04577207192778587, "learning_rate": 1.2964552630749437e-05, "loss": 6.9692, "step": 28060 }, { "epoch": 0.11273644388398486, "grad_norm": 0.06704005599021912, "learning_rate": 1.2943962683294285e-05, "loss": 6.9752, "step": 28070 }, { "epoch": 0.11298865069356873, "grad_norm": 0.05370500683784485, "learning_rate": 1.2923384600512704e-05, "loss": 6.9764, "step": 28080 }, { "epoch": 0.11324085750315259, "grad_norm": 0.06084606423974037, "learning_rate": 1.2902818396719377e-05, "loss": 6.9738, "step": 28090 }, { "epoch": 0.11349306431273644, "grad_norm": 0.07059165835380554, "learning_rate": 1.2882264086220778e-05, "loss": 6.9802, "step": 28100 }, { "epoch": 0.1137452711223203, "grad_norm": 0.06750708818435669, "learning_rate": 1.286172168331507e-05, "loss": 6.9796, "step": 28110 }, { "epoch": 0.11399747793190416, "grad_norm": 0.06810009479522705, "learning_rate": 1.2841191202292137e-05, "loss": 6.9798, "step": 28120 }, { "epoch": 0.11424968474148801, "grad_norm": 0.06067595258355141, "learning_rate": 1.2820672657433574e-05, "loss": 6.9761, "step": 28130 }, { "epoch": 0.11450189155107188, "grad_norm": 0.06703101843595505, "learning_rate": 1.280016606301269e-05, "loss": 6.9756, "step": 28140 }, { "epoch": 0.11475409836065574, "grad_norm": 0.06988810747861862, "learning_rate": 1.2779671433294459e-05, "loss": 6.9784, "step": 28150 }, { "epoch": 0.1150063051702396, "grad_norm": 0.06317925453186035, "learning_rate": 1.2759188782535534e-05, "loss": 6.9779, "step": 28160 }, { "epoch": 0.11525851197982345, "grad_norm": 0.058078981935977936, "learning_rate": 1.2738718124984248e-05, "loss": 6.9728, "step": 28170 }, { "epoch": 0.11551071878940732, "grad_norm": 0.05248633399605751, "learning_rate": 1.2718259474880566e-05, "loss": 6.9704, "step": 28180 }, { "epoch": 0.11576292559899118, "grad_norm": 0.05995425581932068, "learning_rate": 1.2697812846456142e-05, "loss": 6.9744, "step": 28190 }, { "epoch": 0.11601513240857503, "grad_norm": 0.06503122299909592, "learning_rate": 1.2677378253934221e-05, "loss": 6.979, "step": 28200 }, { "epoch": 0.11626733921815889, "grad_norm": 0.07239556312561035, "learning_rate": 1.265695571152973e-05, "loss": 6.9803, "step": 28210 }, { "epoch": 0.11651954602774275, "grad_norm": 0.07235059887170792, "learning_rate": 1.2636545233449138e-05, "loss": 6.9832, "step": 28220 }, { "epoch": 0.1167717528373266, "grad_norm": 0.07498928904533386, "learning_rate": 1.2616146833890593e-05, "loss": 6.984, "step": 28230 }, { "epoch": 0.11702395964691047, "grad_norm": 0.06152504310011864, "learning_rate": 1.2595760527043798e-05, "loss": 6.9828, "step": 28240 }, { "epoch": 0.11727616645649433, "grad_norm": 0.06393901258707047, "learning_rate": 1.2575386327090075e-05, "loss": 6.9776, "step": 28250 }, { "epoch": 0.11752837326607818, "grad_norm": 0.05921265110373497, "learning_rate": 1.2555024248202299e-05, "loss": 6.9755, "step": 28260 }, { "epoch": 0.11778058007566204, "grad_norm": 0.06566720455884933, "learning_rate": 1.253467430454492e-05, "loss": 6.9799, "step": 28270 }, { "epoch": 0.1180327868852459, "grad_norm": 0.06483804434537888, "learning_rate": 1.251433651027395e-05, "loss": 6.9799, "step": 28280 }, { "epoch": 0.11828499369482975, "grad_norm": 0.06824260205030441, "learning_rate": 1.2494010879536937e-05, "loss": 6.9809, "step": 28290 }, { "epoch": 0.11853720050441362, "grad_norm": 0.061137862503528595, "learning_rate": 1.2473697426472999e-05, "loss": 6.9798, "step": 28300 }, { "epoch": 0.11878940731399748, "grad_norm": 0.06728857010602951, "learning_rate": 1.2453396165212752e-05, "loss": 6.9815, "step": 28310 }, { "epoch": 0.11904161412358134, "grad_norm": 0.06827369332313538, "learning_rate": 1.2433107109878339e-05, "loss": 6.9807, "step": 28320 }, { "epoch": 0.11929382093316519, "grad_norm": 0.058284103870391846, "learning_rate": 1.241283027458341e-05, "loss": 6.9764, "step": 28330 }, { "epoch": 0.11954602774274906, "grad_norm": 0.06793686747550964, "learning_rate": 1.2392565673433136e-05, "loss": 6.9784, "step": 28340 }, { "epoch": 0.11979823455233292, "grad_norm": 0.04896509647369385, "learning_rate": 1.2372313320524136e-05, "loss": 6.9729, "step": 28350 }, { "epoch": 0.12005044136191677, "grad_norm": 0.05259054899215698, "learning_rate": 1.235207322994456e-05, "loss": 6.9655, "step": 28360 }, { "epoch": 0.12030264817150063, "grad_norm": 0.04662218317389488, "learning_rate": 1.2331845415773989e-05, "loss": 6.9652, "step": 28370 }, { "epoch": 0.1205548549810845, "grad_norm": 0.04938393086194992, "learning_rate": 1.2311629892083476e-05, "loss": 6.9652, "step": 28380 }, { "epoch": 0.12080706179066834, "grad_norm": 0.06339941173791885, "learning_rate": 1.2291426672935522e-05, "loss": 6.9726, "step": 28390 }, { "epoch": 0.1210592686002522, "grad_norm": 0.06323794275522232, "learning_rate": 1.2271235772384067e-05, "loss": 6.9761, "step": 28400 }, { "epoch": 0.12131147540983607, "grad_norm": 0.06435132771730423, "learning_rate": 1.2251057204474503e-05, "loss": 6.9763, "step": 28410 }, { "epoch": 0.12156368221941992, "grad_norm": 0.062218960374593735, "learning_rate": 1.2230890983243617e-05, "loss": 6.9763, "step": 28420 }, { "epoch": 0.12181588902900378, "grad_norm": 0.059713516384363174, "learning_rate": 1.2210737122719613e-05, "loss": 6.9764, "step": 28430 }, { "epoch": 0.12206809583858765, "grad_norm": 0.057097651064395905, "learning_rate": 1.2190595636922097e-05, "loss": 6.9703, "step": 28440 }, { "epoch": 0.1223203026481715, "grad_norm": 0.05537387356162071, "learning_rate": 1.2170466539862085e-05, "loss": 6.9686, "step": 28450 }, { "epoch": 0.12257250945775536, "grad_norm": 0.07063174247741699, "learning_rate": 1.2150349845541944e-05, "loss": 6.9755, "step": 28460 }, { "epoch": 0.12282471626733922, "grad_norm": 0.07447841763496399, "learning_rate": 1.2130245567955453e-05, "loss": 6.9828, "step": 28470 }, { "epoch": 0.12307692307692308, "grad_norm": 0.07003980129957199, "learning_rate": 1.2110153721087694e-05, "loss": 6.9818, "step": 28480 }, { "epoch": 0.12332912988650693, "grad_norm": 0.07318668812513351, "learning_rate": 1.2090074318915166e-05, "loss": 6.9822, "step": 28490 }, { "epoch": 0.1235813366960908, "grad_norm": 0.06464444845914841, "learning_rate": 1.2070007375405677e-05, "loss": 6.9834, "step": 28500 }, { "epoch": 0.12383354350567466, "grad_norm": 0.06530027836561203, "learning_rate": 1.2049952904518355e-05, "loss": 6.9777, "step": 28510 }, { "epoch": 0.12408575031525851, "grad_norm": 0.058405227959156036, "learning_rate": 1.2029910920203697e-05, "loss": 6.9767, "step": 28520 }, { "epoch": 0.12433795712484237, "grad_norm": 0.06572219729423523, "learning_rate": 1.2009881436403483e-05, "loss": 6.9781, "step": 28530 }, { "epoch": 0.12459016393442623, "grad_norm": 0.03093935362994671, "learning_rate": 1.1989864467050792e-05, "loss": 6.9711, "step": 28540 }, { "epoch": 0.12484237074401008, "grad_norm": 0.047403689473867416, "learning_rate": 1.1969860026070001e-05, "loss": 6.9535, "step": 28550 }, { "epoch": 0.12509457755359396, "grad_norm": 0.05308094248175621, "learning_rate": 1.1949868127376802e-05, "loss": 6.9663, "step": 28560 }, { "epoch": 0.1253467843631778, "grad_norm": 0.053915198892354965, "learning_rate": 1.1929888784878127e-05, "loss": 6.9736, "step": 28570 }, { "epoch": 0.12559899117276166, "grad_norm": 0.050866059958934784, "learning_rate": 1.1909922012472182e-05, "loss": 6.9678, "step": 28580 }, { "epoch": 0.12585119798234554, "grad_norm": 0.04685047268867493, "learning_rate": 1.1889967824048423e-05, "loss": 6.9661, "step": 28590 }, { "epoch": 0.12610340479192939, "grad_norm": 0.05179118737578392, "learning_rate": 1.1870026233487587e-05, "loss": 6.9671, "step": 28600 }, { "epoch": 0.12635561160151323, "grad_norm": 0.05615481361746788, "learning_rate": 1.1850097254661594e-05, "loss": 6.9707, "step": 28610 }, { "epoch": 0.1266078184110971, "grad_norm": 0.06628232449293137, "learning_rate": 1.1830180901433646e-05, "loss": 6.9792, "step": 28620 }, { "epoch": 0.12686002522068096, "grad_norm": 0.061452291905879974, "learning_rate": 1.1810277187658135e-05, "loss": 6.9796, "step": 28630 }, { "epoch": 0.1271122320302648, "grad_norm": 0.03938031569123268, "learning_rate": 1.1790386127180623e-05, "loss": 6.9603, "step": 28640 }, { "epoch": 0.1273644388398487, "grad_norm": 0.06081527844071388, "learning_rate": 1.1770507733837944e-05, "loss": 6.9666, "step": 28650 }, { "epoch": 0.12761664564943254, "grad_norm": 0.07033318281173706, "learning_rate": 1.1750642021458062e-05, "loss": 6.9757, "step": 28660 }, { "epoch": 0.12786885245901639, "grad_norm": 0.06960902363061905, "learning_rate": 1.1730789003860164e-05, "loss": 6.9834, "step": 28670 }, { "epoch": 0.12812105926860026, "grad_norm": 0.06050384044647217, "learning_rate": 1.171094869485457e-05, "loss": 6.9736, "step": 28680 }, { "epoch": 0.1283732660781841, "grad_norm": 0.054873593151569366, "learning_rate": 1.1691121108242779e-05, "loss": 6.97, "step": 28690 }, { "epoch": 0.12862547288776796, "grad_norm": 0.05669308453798294, "learning_rate": 1.1671306257817423e-05, "loss": 6.9689, "step": 28700 }, { "epoch": 0.12887767969735184, "grad_norm": 0.05636686831712723, "learning_rate": 1.1651504157362304e-05, "loss": 6.971, "step": 28710 }, { "epoch": 0.1291298865069357, "grad_norm": 0.05579865723848343, "learning_rate": 1.1631714820652315e-05, "loss": 6.9739, "step": 28720 }, { "epoch": 0.12938209331651954, "grad_norm": 0.07203740626573563, "learning_rate": 1.1611938261453525e-05, "loss": 6.9794, "step": 28730 }, { "epoch": 0.1296343001261034, "grad_norm": 0.06531516462564468, "learning_rate": 1.1592174493523043e-05, "loss": 6.9788, "step": 28740 }, { "epoch": 0.12988650693568726, "grad_norm": 0.06738928705453873, "learning_rate": 1.1572423530609142e-05, "loss": 6.9796, "step": 28750 }, { "epoch": 0.1301387137452711, "grad_norm": 0.06802210956811905, "learning_rate": 1.155268538645116e-05, "loss": 6.9808, "step": 28760 }, { "epoch": 0.130390920554855, "grad_norm": 0.055487360805273056, "learning_rate": 1.153296007477951e-05, "loss": 6.9745, "step": 28770 }, { "epoch": 0.13064312736443884, "grad_norm": 0.05394229292869568, "learning_rate": 1.1513247609315709e-05, "loss": 6.9729, "step": 28780 }, { "epoch": 0.1308953341740227, "grad_norm": 0.06298384070396423, "learning_rate": 1.1493548003772311e-05, "loss": 6.9743, "step": 28790 }, { "epoch": 0.13114754098360656, "grad_norm": 0.07001691311597824, "learning_rate": 1.147386127185293e-05, "loss": 6.9801, "step": 28800 }, { "epoch": 0.1313997477931904, "grad_norm": 0.06871996074914932, "learning_rate": 1.1454187427252213e-05, "loss": 6.9805, "step": 28810 }, { "epoch": 0.13165195460277426, "grad_norm": 0.06605315953493118, "learning_rate": 1.1434526483655886e-05, "loss": 6.9798, "step": 28820 }, { "epoch": 0.13190416141235814, "grad_norm": 0.06493818759918213, "learning_rate": 1.1414878454740656e-05, "loss": 6.9795, "step": 28830 }, { "epoch": 0.132156368221942, "grad_norm": 0.06943175941705704, "learning_rate": 1.1395243354174257e-05, "loss": 6.9812, "step": 28840 }, { "epoch": 0.13240857503152584, "grad_norm": 0.06983596086502075, "learning_rate": 1.137562119561543e-05, "loss": 6.9816, "step": 28850 }, { "epoch": 0.13266078184110972, "grad_norm": 0.06427829712629318, "learning_rate": 1.135601199271393e-05, "loss": 6.9771, "step": 28860 }, { "epoch": 0.13291298865069356, "grad_norm": 0.0596126951277256, "learning_rate": 1.1336415759110483e-05, "loss": 6.9743, "step": 28870 }, { "epoch": 0.13316519546027744, "grad_norm": 0.06219634786248207, "learning_rate": 1.1316832508436787e-05, "loss": 6.9755, "step": 28880 }, { "epoch": 0.1334174022698613, "grad_norm": 0.06540021300315857, "learning_rate": 1.1297262254315543e-05, "loss": 6.9761, "step": 28890 }, { "epoch": 0.13366960907944514, "grad_norm": 0.06467092782258987, "learning_rate": 1.1277705010360349e-05, "loss": 6.9731, "step": 28900 }, { "epoch": 0.13392181588902902, "grad_norm": 0.057344406843185425, "learning_rate": 1.125816079017582e-05, "loss": 6.9714, "step": 28910 }, { "epoch": 0.13417402269861287, "grad_norm": 0.06487845629453659, "learning_rate": 1.1238629607357464e-05, "loss": 6.9754, "step": 28920 }, { "epoch": 0.13442622950819672, "grad_norm": 0.06184601038694382, "learning_rate": 1.1219111475491754e-05, "loss": 6.9804, "step": 28930 }, { "epoch": 0.1346784363177806, "grad_norm": 0.06188199669122696, "learning_rate": 1.119960640815606e-05, "loss": 6.9735, "step": 28940 }, { "epoch": 0.13493064312736444, "grad_norm": 0.06342869251966476, "learning_rate": 1.118011441891867e-05, "loss": 6.9738, "step": 28950 }, { "epoch": 0.1351828499369483, "grad_norm": 0.06358727067708969, "learning_rate": 1.1160635521338767e-05, "loss": 6.9783, "step": 28960 }, { "epoch": 0.13543505674653217, "grad_norm": 0.0603787936270237, "learning_rate": 1.114116972896646e-05, "loss": 6.973, "step": 28970 }, { "epoch": 0.13568726355611602, "grad_norm": 0.05685180425643921, "learning_rate": 1.1121717055342694e-05, "loss": 6.9736, "step": 28980 }, { "epoch": 0.13593947036569987, "grad_norm": 0.04830327257514, "learning_rate": 1.1102277513999341e-05, "loss": 6.9661, "step": 28990 }, { "epoch": 0.13619167717528374, "grad_norm": 0.07001760601997375, "learning_rate": 1.1082851118459084e-05, "loss": 6.9723, "step": 29000 }, { "epoch": 0.1364438839848676, "grad_norm": 0.059200435876846313, "learning_rate": 1.1063437882235478e-05, "loss": 6.979, "step": 29010 }, { "epoch": 0.13669609079445144, "grad_norm": 0.06499964743852615, "learning_rate": 1.1044037818832961e-05, "loss": 6.9769, "step": 29020 }, { "epoch": 0.13694829760403532, "grad_norm": 0.0659562274813652, "learning_rate": 1.1024650941746756e-05, "loss": 6.98, "step": 29030 }, { "epoch": 0.13720050441361917, "grad_norm": 0.06870611757040024, "learning_rate": 1.1005277264462956e-05, "loss": 6.9769, "step": 29040 }, { "epoch": 0.13745271122320302, "grad_norm": 0.07262314110994339, "learning_rate": 1.098591680045844e-05, "loss": 6.981, "step": 29050 }, { "epoch": 0.1377049180327869, "grad_norm": 0.0711677148938179, "learning_rate": 1.096656956320091e-05, "loss": 6.9832, "step": 29060 }, { "epoch": 0.13795712484237074, "grad_norm": 0.07279966026544571, "learning_rate": 1.0947235566148858e-05, "loss": 6.9847, "step": 29070 }, { "epoch": 0.1382093316519546, "grad_norm": 0.062228355556726456, "learning_rate": 1.0927914822751585e-05, "loss": 6.9794, "step": 29080 }, { "epoch": 0.13846153846153847, "grad_norm": 0.06561195105314255, "learning_rate": 1.090860734644915e-05, "loss": 6.9786, "step": 29090 }, { "epoch": 0.13871374527112232, "grad_norm": 0.0732274129986763, "learning_rate": 1.0889313150672399e-05, "loss": 6.9804, "step": 29100 }, { "epoch": 0.13896595208070617, "grad_norm": 0.06516612321138382, "learning_rate": 1.0870032248842922e-05, "loss": 6.9806, "step": 29110 }, { "epoch": 0.13921815889029004, "grad_norm": 0.05687468498945236, "learning_rate": 1.085076465437309e-05, "loss": 6.9775, "step": 29120 }, { "epoch": 0.1394703656998739, "grad_norm": 0.06619321554899216, "learning_rate": 1.083151038066599e-05, "loss": 6.9786, "step": 29130 }, { "epoch": 0.13972257250945774, "grad_norm": 0.06987091153860092, "learning_rate": 1.0812269441115441e-05, "loss": 6.9818, "step": 29140 }, { "epoch": 0.13997477931904162, "grad_norm": 0.06180534139275551, "learning_rate": 1.0793041849106035e-05, "loss": 6.9755, "step": 29150 }, { "epoch": 0.14022698612862547, "grad_norm": 0.0614415742456913, "learning_rate": 1.0773827618012991e-05, "loss": 6.9761, "step": 29160 }, { "epoch": 0.14047919293820932, "grad_norm": 0.06353729218244553, "learning_rate": 1.0754626761202326e-05, "loss": 6.9779, "step": 29170 }, { "epoch": 0.1407313997477932, "grad_norm": 0.05901533365249634, "learning_rate": 1.0735439292030682e-05, "loss": 6.9715, "step": 29180 }, { "epoch": 0.14098360655737704, "grad_norm": 0.061766140162944794, "learning_rate": 1.0716265223845448e-05, "loss": 6.9753, "step": 29190 }, { "epoch": 0.14123581336696092, "grad_norm": 0.05916517600417137, "learning_rate": 1.0697104569984642e-05, "loss": 6.972, "step": 29200 }, { "epoch": 0.14148802017654477, "grad_norm": 0.0553058460354805, "learning_rate": 1.0677957343776976e-05, "loss": 6.971, "step": 29210 }, { "epoch": 0.14174022698612862, "grad_norm": 0.04962935671210289, "learning_rate": 1.06588235585418e-05, "loss": 6.9676, "step": 29220 }, { "epoch": 0.1419924337957125, "grad_norm": 0.054741259664297104, "learning_rate": 1.0639703227589158e-05, "loss": 6.967, "step": 29230 }, { "epoch": 0.14224464060529635, "grad_norm": 0.0529380701482296, "learning_rate": 1.0620596364219687e-05, "loss": 6.9684, "step": 29240 }, { "epoch": 0.1424968474148802, "grad_norm": 0.06928584724664688, "learning_rate": 1.060150298172468e-05, "loss": 6.9737, "step": 29250 }, { "epoch": 0.14274905422446407, "grad_norm": 0.06492013484239578, "learning_rate": 1.058242309338604e-05, "loss": 6.981, "step": 29260 }, { "epoch": 0.14300126103404792, "grad_norm": 0.0540274903178215, "learning_rate": 1.0563356712476286e-05, "loss": 6.9735, "step": 29270 }, { "epoch": 0.14325346784363177, "grad_norm": 0.062339358031749725, "learning_rate": 1.0544303852258566e-05, "loss": 6.97, "step": 29280 }, { "epoch": 0.14350567465321565, "grad_norm": 0.06169098988175392, "learning_rate": 1.0525264525986575e-05, "loss": 6.9721, "step": 29290 }, { "epoch": 0.1437578814627995, "grad_norm": 0.06202748417854309, "learning_rate": 1.0506238746904649e-05, "loss": 6.9733, "step": 29300 }, { "epoch": 0.14401008827238335, "grad_norm": 0.063414566218853, "learning_rate": 1.048722652824765e-05, "loss": 6.9784, "step": 29310 }, { "epoch": 0.14426229508196722, "grad_norm": 0.060886505991220474, "learning_rate": 1.0468227883241037e-05, "loss": 6.9771, "step": 29320 }, { "epoch": 0.14451450189155107, "grad_norm": 0.06429887562990189, "learning_rate": 1.0449242825100806e-05, "loss": 6.9795, "step": 29330 }, { "epoch": 0.14476670870113492, "grad_norm": 0.06599288433790207, "learning_rate": 1.0430271367033528e-05, "loss": 6.9808, "step": 29340 }, { "epoch": 0.1450189155107188, "grad_norm": 0.04259196296334267, "learning_rate": 1.0411313522236292e-05, "loss": 6.9754, "step": 29350 }, { "epoch": 0.14527112232030265, "grad_norm": 0.029182448983192444, "learning_rate": 1.0392369303896725e-05, "loss": 6.9467, "step": 29360 }, { "epoch": 0.1455233291298865, "grad_norm": 0.043149616569280624, "learning_rate": 1.0373438725192976e-05, "loss": 6.9541, "step": 29370 }, { "epoch": 0.14577553593947037, "grad_norm": 0.06809855252504349, "learning_rate": 1.0354521799293685e-05, "loss": 6.9718, "step": 29380 }, { "epoch": 0.14602774274905422, "grad_norm": 0.06913764029741287, "learning_rate": 1.0335618539358047e-05, "loss": 6.9783, "step": 29390 }, { "epoch": 0.14627994955863807, "grad_norm": 0.06657219678163528, "learning_rate": 1.0316728958535684e-05, "loss": 6.9786, "step": 29400 }, { "epoch": 0.14653215636822195, "grad_norm": 0.05927315726876259, "learning_rate": 1.0297853069966775e-05, "loss": 6.9777, "step": 29410 }, { "epoch": 0.1467843631778058, "grad_norm": 0.060675617307424545, "learning_rate": 1.0278990886781886e-05, "loss": 6.9776, "step": 29420 }, { "epoch": 0.14703656998738965, "grad_norm": 0.06892792135477066, "learning_rate": 1.0260142422102137e-05, "loss": 6.9808, "step": 29430 }, { "epoch": 0.14728877679697353, "grad_norm": 0.0718960165977478, "learning_rate": 1.0241307689039043e-05, "loss": 6.9828, "step": 29440 }, { "epoch": 0.14754098360655737, "grad_norm": 0.05898508056998253, "learning_rate": 1.0222486700694606e-05, "loss": 6.9804, "step": 29450 }, { "epoch": 0.14779319041614122, "grad_norm": 0.056999322026968, "learning_rate": 1.0203679470161244e-05, "loss": 6.9736, "step": 29460 }, { "epoch": 0.1480453972257251, "grad_norm": 0.05172068625688553, "learning_rate": 1.018488601052181e-05, "loss": 6.97, "step": 29470 }, { "epoch": 0.14829760403530895, "grad_norm": 0.07261275500059128, "learning_rate": 1.0166106334849572e-05, "loss": 6.9759, "step": 29480 }, { "epoch": 0.1485498108448928, "grad_norm": 0.07282055169343948, "learning_rate": 1.014734045620823e-05, "loss": 6.9839, "step": 29490 }, { "epoch": 0.14880201765447668, "grad_norm": 0.06815483421087265, "learning_rate": 1.0128588387651871e-05, "loss": 6.9827, "step": 29500 }, { "epoch": 0.14905422446406053, "grad_norm": 0.0522121824324131, "learning_rate": 1.010985014222497e-05, "loss": 6.9764, "step": 29510 }, { "epoch": 0.1493064312736444, "grad_norm": 0.05278226360678673, "learning_rate": 1.0091125732962396e-05, "loss": 6.9688, "step": 29520 }, { "epoch": 0.14955863808322825, "grad_norm": 0.05465933680534363, "learning_rate": 1.007241517288938e-05, "loss": 6.9685, "step": 29530 }, { "epoch": 0.1498108448928121, "grad_norm": 0.06136702746152878, "learning_rate": 1.005371847502155e-05, "loss": 6.9674, "step": 29540 }, { "epoch": 0.15006305170239598, "grad_norm": 0.07016170769929886, "learning_rate": 1.003503565236485e-05, "loss": 6.9773, "step": 29550 }, { "epoch": 0.15031525851197983, "grad_norm": 0.06852513551712036, "learning_rate": 1.0016366717915614e-05, "loss": 6.9811, "step": 29560 }, { "epoch": 0.15056746532156368, "grad_norm": 0.05556391924619675, "learning_rate": 9.99771168466048e-06, "loss": 6.9761, "step": 29570 }, { "epoch": 0.15081967213114755, "grad_norm": 0.051703982055187225, "learning_rate": 9.979070565576432e-06, "loss": 6.9714, "step": 29580 }, { "epoch": 0.1510718789407314, "grad_norm": 0.056767888367176056, "learning_rate": 9.960443373630763e-06, "loss": 6.9697, "step": 29590 }, { "epoch": 0.15132408575031525, "grad_norm": 0.05385768786072731, "learning_rate": 9.941830121781106e-06, "loss": 6.9696, "step": 29600 }, { "epoch": 0.15157629255989913, "grad_norm": 0.07300745695829391, "learning_rate": 9.92323082297537e-06, "loss": 6.9785, "step": 29610 }, { "epoch": 0.15182849936948298, "grad_norm": 0.07077789306640625, "learning_rate": 9.90464549015176e-06, "loss": 6.9781, "step": 29620 }, { "epoch": 0.15208070617906683, "grad_norm": 0.05975412577390671, "learning_rate": 9.886074136238782e-06, "loss": 6.9775, "step": 29630 }, { "epoch": 0.1523329129886507, "grad_norm": 0.0676388069987297, "learning_rate": 9.867516774155188e-06, "loss": 6.9815, "step": 29640 }, { "epoch": 0.15258511979823455, "grad_norm": 0.06632721424102783, "learning_rate": 9.848973416810042e-06, "loss": 6.9818, "step": 29650 }, { "epoch": 0.1528373266078184, "grad_norm": 0.06231981888413429, "learning_rate": 9.830444077102618e-06, "loss": 6.979, "step": 29660 }, { "epoch": 0.15308953341740228, "grad_norm": 0.06450969725847244, "learning_rate": 9.811928767922492e-06, "loss": 6.9791, "step": 29670 }, { "epoch": 0.15334174022698613, "grad_norm": 0.060906462371349335, "learning_rate": 9.79342750214941e-06, "loss": 6.9794, "step": 29680 }, { "epoch": 0.15359394703656998, "grad_norm": 0.06567589193582535, "learning_rate": 9.77494029265342e-06, "loss": 6.9759, "step": 29690 }, { "epoch": 0.15384615384615385, "grad_norm": 0.06196463108062744, "learning_rate": 9.756467152294735e-06, "loss": 6.976, "step": 29700 }, { "epoch": 0.1540983606557377, "grad_norm": 0.03907887265086174, "learning_rate": 9.738008093923832e-06, "loss": 6.9591, "step": 29710 }, { "epoch": 0.15435056746532155, "grad_norm": 0.058961715549230576, "learning_rate": 9.719563130381352e-06, "loss": 6.968, "step": 29720 }, { "epoch": 0.15460277427490543, "grad_norm": 0.07355063408613205, "learning_rate": 9.701132274498149e-06, "loss": 6.9754, "step": 29730 }, { "epoch": 0.15485498108448928, "grad_norm": 0.06694003939628601, "learning_rate": 9.682715539095258e-06, "loss": 6.9827, "step": 29740 }, { "epoch": 0.15510718789407313, "grad_norm": 0.06267250329256058, "learning_rate": 9.664312936983888e-06, "loss": 6.9779, "step": 29750 }, { "epoch": 0.155359394703657, "grad_norm": 0.06339376419782639, "learning_rate": 9.645924480965437e-06, "loss": 6.9766, "step": 29760 }, { "epoch": 0.15561160151324085, "grad_norm": 0.06802881509065628, "learning_rate": 9.62755018383144e-06, "loss": 6.9795, "step": 29770 }, { "epoch": 0.1558638083228247, "grad_norm": 0.054249074310064316, "learning_rate": 9.609190058363591e-06, "loss": 6.9734, "step": 29780 }, { "epoch": 0.15611601513240858, "grad_norm": 0.054911885410547256, "learning_rate": 9.590844117333711e-06, "loss": 6.9632, "step": 29790 }, { "epoch": 0.15636822194199243, "grad_norm": 0.04902892932295799, "learning_rate": 9.572512373503794e-06, "loss": 6.9637, "step": 29800 }, { "epoch": 0.15662042875157628, "grad_norm": 0.062361959367990494, "learning_rate": 9.554194839625908e-06, "loss": 6.9772, "step": 29810 }, { "epoch": 0.15687263556116016, "grad_norm": 0.06302661448717117, "learning_rate": 9.53589152844228e-06, "loss": 6.9701, "step": 29820 }, { "epoch": 0.157124842370744, "grad_norm": 0.061259955167770386, "learning_rate": 9.517602452685218e-06, "loss": 6.9757, "step": 29830 }, { "epoch": 0.15737704918032788, "grad_norm": 0.057429857552051544, "learning_rate": 9.499327625077124e-06, "loss": 6.9732, "step": 29840 }, { "epoch": 0.15762925598991173, "grad_norm": 0.04756232723593712, "learning_rate": 9.481067058330493e-06, "loss": 6.9642, "step": 29850 }, { "epoch": 0.15788146279949558, "grad_norm": 0.05722454562783241, "learning_rate": 9.462820765147928e-06, "loss": 6.9698, "step": 29860 }, { "epoch": 0.15813366960907946, "grad_norm": 0.05582039803266525, "learning_rate": 9.444588758222058e-06, "loss": 6.9713, "step": 29870 }, { "epoch": 0.1583858764186633, "grad_norm": 0.07018125802278519, "learning_rate": 9.426371050235605e-06, "loss": 6.9753, "step": 29880 }, { "epoch": 0.15863808322824716, "grad_norm": 0.062229931354522705, "learning_rate": 9.408167653861335e-06, "loss": 6.9816, "step": 29890 }, { "epoch": 0.15889029003783103, "grad_norm": 0.060465723276138306, "learning_rate": 9.389978581762038e-06, "loss": 6.9765, "step": 29900 }, { "epoch": 0.15914249684741488, "grad_norm": 0.06729966402053833, "learning_rate": 9.371803846590585e-06, "loss": 6.9779, "step": 29910 }, { "epoch": 0.15939470365699873, "grad_norm": 0.07039695233106613, "learning_rate": 9.353643460989834e-06, "loss": 6.9819, "step": 29920 }, { "epoch": 0.1596469104665826, "grad_norm": 0.07056126743555069, "learning_rate": 9.335497437592695e-06, "loss": 6.9823, "step": 29930 }, { "epoch": 0.15989911727616646, "grad_norm": 0.03534773364663124, "learning_rate": 9.317365789022032e-06, "loss": 6.9604, "step": 29940 }, { "epoch": 0.1601513240857503, "grad_norm": 0.05823979154229164, "learning_rate": 9.299248527890776e-06, "loss": 6.9664, "step": 29950 }, { "epoch": 0.16040353089533418, "grad_norm": 0.06887295842170715, "learning_rate": 9.281145666801798e-06, "loss": 6.975, "step": 29960 }, { "epoch": 0.16065573770491803, "grad_norm": 0.06578003615140915, "learning_rate": 9.263057218347989e-06, "loss": 6.9778, "step": 29970 }, { "epoch": 0.16090794451450188, "grad_norm": 0.054675497114658356, "learning_rate": 9.244983195112193e-06, "loss": 6.976, "step": 29980 }, { "epoch": 0.16116015132408576, "grad_norm": 0.06414001435041428, "learning_rate": 9.22692360966722e-06, "loss": 6.9797, "step": 29990 }, { "epoch": 0.1614123581336696, "grad_norm": 0.06170126423239708, "learning_rate": 9.208878474575844e-06, "loss": 6.9789, "step": 30000 }, { "epoch": 0.16166456494325346, "grad_norm": 0.062035880982875824, "learning_rate": 9.19265021842096e-06, "loss": 6.9791, "step": 30010 }, { "epoch": 0.16191677175283734, "grad_norm": 0.06380894780158997, "learning_rate": 9.174632573575827e-06, "loss": 6.9742, "step": 30020 }, { "epoch": 0.16216897856242118, "grad_norm": 0.06448207050561905, "learning_rate": 9.15662941545946e-06, "loss": 6.9738, "step": 30030 }, { "epoch": 0.16242118537200503, "grad_norm": 0.0658341720700264, "learning_rate": 9.138640756595369e-06, "loss": 6.9779, "step": 30040 }, { "epoch": 0.1626733921815889, "grad_norm": 0.05998600274324417, "learning_rate": 9.12066660949698e-06, "loss": 6.9753, "step": 30050 }, { "epoch": 0.16292559899117276, "grad_norm": 0.05327016860246658, "learning_rate": 9.102706986667623e-06, "loss": 6.9705, "step": 30060 }, { "epoch": 0.1631778058007566, "grad_norm": 0.04617176204919815, "learning_rate": 9.084761900600552e-06, "loss": 6.9679, "step": 30070 }, { "epoch": 0.16343001261034049, "grad_norm": 0.04517354816198349, "learning_rate": 9.066831363778871e-06, "loss": 6.9658, "step": 30080 }, { "epoch": 0.16368221941992434, "grad_norm": 0.06277396529912949, "learning_rate": 9.048915388675596e-06, "loss": 6.9743, "step": 30090 }, { "epoch": 0.16393442622950818, "grad_norm": 0.05813482776284218, "learning_rate": 9.031013987753593e-06, "loss": 6.9731, "step": 30100 }, { "epoch": 0.16418663303909206, "grad_norm": 0.06531252712011337, "learning_rate": 9.013127173465592e-06, "loss": 6.9782, "step": 30110 }, { "epoch": 0.1644388398486759, "grad_norm": 0.07056690007448196, "learning_rate": 8.995254958254178e-06, "loss": 6.9818, "step": 30120 }, { "epoch": 0.16469104665825976, "grad_norm": 0.06524289399385452, "learning_rate": 8.977397354551773e-06, "loss": 6.9827, "step": 30130 }, { "epoch": 0.16494325346784364, "grad_norm": 0.058715321123600006, "learning_rate": 8.959554374780663e-06, "loss": 6.9769, "step": 30140 }, { "epoch": 0.16519546027742749, "grad_norm": 0.05250691995024681, "learning_rate": 8.941726031352935e-06, "loss": 6.9705, "step": 30150 }, { "epoch": 0.16544766708701136, "grad_norm": 0.05085649713873863, "learning_rate": 8.923912336670494e-06, "loss": 6.9679, "step": 30160 }, { "epoch": 0.1656998738965952, "grad_norm": 0.05026950687170029, "learning_rate": 8.90611330312506e-06, "loss": 6.9662, "step": 30170 }, { "epoch": 0.16595208070617906, "grad_norm": 0.056126948446035385, "learning_rate": 8.88832894309817e-06, "loss": 6.9729, "step": 30180 }, { "epoch": 0.16620428751576294, "grad_norm": 0.06393422931432724, "learning_rate": 8.870559268961125e-06, "loss": 6.9745, "step": 30190 }, { "epoch": 0.1664564943253468, "grad_norm": 0.06456936150789261, "learning_rate": 8.852804293075048e-06, "loss": 6.9786, "step": 30200 }, { "epoch": 0.16670870113493064, "grad_norm": 0.05731337517499924, "learning_rate": 8.835064027790783e-06, "loss": 6.9753, "step": 30210 }, { "epoch": 0.16696090794451451, "grad_norm": 0.06573493033647537, "learning_rate": 8.817338485448995e-06, "loss": 6.9763, "step": 30220 }, { "epoch": 0.16721311475409836, "grad_norm": 0.06847917288541794, "learning_rate": 8.799627678380068e-06, "loss": 6.9777, "step": 30230 }, { "epoch": 0.1674653215636822, "grad_norm": 0.06776108592748642, "learning_rate": 8.781931618904174e-06, "loss": 6.9782, "step": 30240 }, { "epoch": 0.1677175283732661, "grad_norm": 0.06438010185956955, "learning_rate": 8.76425031933119e-06, "loss": 6.9796, "step": 30250 }, { "epoch": 0.16796973518284994, "grad_norm": 0.058881960809230804, "learning_rate": 8.746583791960746e-06, "loss": 6.9768, "step": 30260 }, { "epoch": 0.1682219419924338, "grad_norm": 0.05034027248620987, "learning_rate": 8.728932049082186e-06, "loss": 6.9714, "step": 30270 }, { "epoch": 0.16847414880201766, "grad_norm": 0.04455617442727089, "learning_rate": 8.711295102974563e-06, "loss": 6.9638, "step": 30280 }, { "epoch": 0.16872635561160151, "grad_norm": 0.04192113131284714, "learning_rate": 8.693672965906669e-06, "loss": 6.963, "step": 30290 }, { "epoch": 0.16897856242118536, "grad_norm": 0.059737738221883774, "learning_rate": 8.676065650136961e-06, "loss": 6.9681, "step": 30300 }, { "epoch": 0.16923076923076924, "grad_norm": 0.0651627853512764, "learning_rate": 8.658473167913604e-06, "loss": 6.9751, "step": 30310 }, { "epoch": 0.1694829760403531, "grad_norm": 0.06486261636018753, "learning_rate": 8.640895531474421e-06, "loss": 6.9809, "step": 30320 }, { "epoch": 0.16973518284993694, "grad_norm": 0.05839043855667114, "learning_rate": 8.623332753046946e-06, "loss": 6.9741, "step": 30330 }, { "epoch": 0.16998738965952082, "grad_norm": 0.059384074062108994, "learning_rate": 8.605784844848344e-06, "loss": 6.9722, "step": 30340 }, { "epoch": 0.17023959646910466, "grad_norm": 0.05148756504058838, "learning_rate": 8.588251819085457e-06, "loss": 6.9714, "step": 30350 }, { "epoch": 0.17049180327868851, "grad_norm": 0.056326452642679214, "learning_rate": 8.570733687954772e-06, "loss": 6.97, "step": 30360 }, { "epoch": 0.1707440100882724, "grad_norm": 0.0638163611292839, "learning_rate": 8.553230463642378e-06, "loss": 6.9722, "step": 30370 }, { "epoch": 0.17099621689785624, "grad_norm": 0.07021261006593704, "learning_rate": 8.535742158324056e-06, "loss": 6.9811, "step": 30380 }, { "epoch": 0.1712484237074401, "grad_norm": 0.06854080408811569, "learning_rate": 8.518268784165152e-06, "loss": 6.9819, "step": 30390 }, { "epoch": 0.17150063051702397, "grad_norm": 0.05177253112196922, "learning_rate": 8.500810353320679e-06, "loss": 6.9777, "step": 30400 }, { "epoch": 0.17175283732660782, "grad_norm": 0.061495162546634674, "learning_rate": 8.48336687793521e-06, "loss": 6.972, "step": 30410 }, { "epoch": 0.17200504413619166, "grad_norm": 0.05669122189283371, "learning_rate": 8.465938370142936e-06, "loss": 6.9717, "step": 30420 }, { "epoch": 0.17225725094577554, "grad_norm": 0.051975104957818985, "learning_rate": 8.448524842067623e-06, "loss": 6.9668, "step": 30430 }, { "epoch": 0.1725094577553594, "grad_norm": 0.06169789284467697, "learning_rate": 8.431126305822642e-06, "loss": 6.9741, "step": 30440 }, { "epoch": 0.17276166456494324, "grad_norm": 0.06327860802412033, "learning_rate": 8.413742773510898e-06, "loss": 6.9738, "step": 30450 }, { "epoch": 0.17301387137452712, "grad_norm": 0.059832457453012466, "learning_rate": 8.396374257224913e-06, "loss": 6.975, "step": 30460 }, { "epoch": 0.17326607818411097, "grad_norm": 0.06461579352617264, "learning_rate": 8.379020769046685e-06, "loss": 6.978, "step": 30470 }, { "epoch": 0.17351828499369484, "grad_norm": 0.0651279017329216, "learning_rate": 8.361682321047841e-06, "loss": 6.9803, "step": 30480 }, { "epoch": 0.1737704918032787, "grad_norm": 0.05948614329099655, "learning_rate": 8.34435892528949e-06, "loss": 6.979, "step": 30490 }, { "epoch": 0.17402269861286254, "grad_norm": 0.06052026152610779, "learning_rate": 8.32705059382228e-06, "loss": 6.9747, "step": 30500 }, { "epoch": 0.17427490542244642, "grad_norm": 0.05107749253511429, "learning_rate": 8.30975733868641e-06, "loss": 6.97, "step": 30510 }, { "epoch": 0.17452711223203027, "grad_norm": 0.06992510706186295, "learning_rate": 8.292479171911559e-06, "loss": 6.968, "step": 30520 }, { "epoch": 0.17477931904161412, "grad_norm": 0.06405752897262573, "learning_rate": 8.275216105516919e-06, "loss": 6.979, "step": 30530 }, { "epoch": 0.175031525851198, "grad_norm": 0.073086678981781, "learning_rate": 8.257968151511167e-06, "loss": 6.9733, "step": 30540 }, { "epoch": 0.17528373266078184, "grad_norm": 0.06459171324968338, "learning_rate": 8.240735321892501e-06, "loss": 6.9781, "step": 30550 }, { "epoch": 0.1755359394703657, "grad_norm": 0.06880571693181992, "learning_rate": 8.223517628648568e-06, "loss": 6.9807, "step": 30560 }, { "epoch": 0.17578814627994957, "grad_norm": 0.06173074617981911, "learning_rate": 8.206315083756492e-06, "loss": 6.9799, "step": 30570 }, { "epoch": 0.17604035308953342, "grad_norm": 0.0644906684756279, "learning_rate": 8.189127699182853e-06, "loss": 6.9776, "step": 30580 }, { "epoch": 0.17629255989911727, "grad_norm": 0.059776581823825836, "learning_rate": 8.171955486883712e-06, "loss": 6.9766, "step": 30590 }, { "epoch": 0.17654476670870115, "grad_norm": 0.06555065512657166, "learning_rate": 8.154798458804539e-06, "loss": 6.978, "step": 30600 }, { "epoch": 0.176796973518285, "grad_norm": 0.06159249693155289, "learning_rate": 8.137656626880278e-06, "loss": 6.9765, "step": 30610 }, { "epoch": 0.17704918032786884, "grad_norm": 0.05706027150154114, "learning_rate": 8.120530003035285e-06, "loss": 6.975, "step": 30620 }, { "epoch": 0.17730138713745272, "grad_norm": 0.05615711212158203, "learning_rate": 8.103418599183303e-06, "loss": 6.9561, "step": 30630 }, { "epoch": 0.17755359394703657, "grad_norm": 0.035007912665605545, "learning_rate": 8.086322427227553e-06, "loss": 6.9594, "step": 30640 }, { "epoch": 0.17780580075662042, "grad_norm": 0.05317353829741478, "learning_rate": 8.069241499060607e-06, "loss": 6.9666, "step": 30650 }, { "epoch": 0.1780580075662043, "grad_norm": 0.054341528564691544, "learning_rate": 8.052175826564475e-06, "loss": 6.9698, "step": 30660 }, { "epoch": 0.17831021437578815, "grad_norm": 0.0566815510392189, "learning_rate": 8.035125421610514e-06, "loss": 6.9707, "step": 30670 }, { "epoch": 0.178562421185372, "grad_norm": 0.06741873174905777, "learning_rate": 8.018090296059492e-06, "loss": 6.9743, "step": 30680 }, { "epoch": 0.17881462799495587, "grad_norm": 0.06787259131669998, "learning_rate": 8.00107046176152e-06, "loss": 6.9802, "step": 30690 }, { "epoch": 0.17906683480453972, "grad_norm": 0.06383650749921799, "learning_rate": 7.984065930556108e-06, "loss": 6.9798, "step": 30700 }, { "epoch": 0.17931904161412357, "grad_norm": 0.06416811794042587, "learning_rate": 7.967076714272081e-06, "loss": 6.9791, "step": 30710 }, { "epoch": 0.17957124842370745, "grad_norm": 0.05514564737677574, "learning_rate": 7.950102824727657e-06, "loss": 6.9765, "step": 30720 }, { "epoch": 0.1798234552332913, "grad_norm": 0.05118343606591225, "learning_rate": 7.933144273730336e-06, "loss": 6.9724, "step": 30730 }, { "epoch": 0.18007566204287515, "grad_norm": 0.06379509717226028, "learning_rate": 7.91620107307698e-06, "loss": 6.9727, "step": 30740 }, { "epoch": 0.18032786885245902, "grad_norm": 0.04483097791671753, "learning_rate": 7.89927323455379e-06, "loss": 6.9685, "step": 30750 }, { "epoch": 0.18058007566204287, "grad_norm": 0.06064945086836815, "learning_rate": 7.88236076993624e-06, "loss": 6.9677, "step": 30760 }, { "epoch": 0.18083228247162672, "grad_norm": 0.05686468631029129, "learning_rate": 7.865463690989148e-06, "loss": 6.9731, "step": 30770 }, { "epoch": 0.1810844892812106, "grad_norm": 0.04676142707467079, "learning_rate": 7.848582009466603e-06, "loss": 6.9733, "step": 30780 }, { "epoch": 0.18133669609079445, "grad_norm": 0.06126236170530319, "learning_rate": 7.831715737111992e-06, "loss": 6.9716, "step": 30790 }, { "epoch": 0.18158890290037832, "grad_norm": 0.06611207127571106, "learning_rate": 7.814864885657973e-06, "loss": 6.9793, "step": 30800 }, { "epoch": 0.18184110970996217, "grad_norm": 0.06162312254309654, "learning_rate": 7.798029466826506e-06, "loss": 6.9783, "step": 30810 }, { "epoch": 0.18209331651954602, "grad_norm": 0.06576896458864212, "learning_rate": 7.781209492328784e-06, "loss": 6.98, "step": 30820 }, { "epoch": 0.1823455233291299, "grad_norm": 0.0684521496295929, "learning_rate": 7.76440497386527e-06, "loss": 6.9789, "step": 30830 }, { "epoch": 0.18259773013871375, "grad_norm": 0.07029572874307632, "learning_rate": 7.747615923125669e-06, "loss": 6.9832, "step": 30840 }, { "epoch": 0.1828499369482976, "grad_norm": 0.06771589070558548, "learning_rate": 7.73084235178894e-06, "loss": 6.9818, "step": 30850 }, { "epoch": 0.18310214375788147, "grad_norm": 0.06763351708650589, "learning_rate": 7.714084271523263e-06, "loss": 6.9805, "step": 30860 }, { "epoch": 0.18335435056746532, "grad_norm": 0.056547246873378754, "learning_rate": 7.69734169398603e-06, "loss": 6.9694, "step": 30870 }, { "epoch": 0.18360655737704917, "grad_norm": 0.0557728111743927, "learning_rate": 7.680614630823897e-06, "loss": 6.9674, "step": 30880 }, { "epoch": 0.18385876418663305, "grad_norm": 0.0558755062520504, "learning_rate": 7.66390309367265e-06, "loss": 6.9669, "step": 30890 }, { "epoch": 0.1841109709962169, "grad_norm": 0.06504415720701218, "learning_rate": 7.64720709415735e-06, "loss": 6.9778, "step": 30900 }, { "epoch": 0.18436317780580075, "grad_norm": 0.053141556680202484, "learning_rate": 7.630526643892199e-06, "loss": 6.9765, "step": 30910 }, { "epoch": 0.18461538461538463, "grad_norm": 0.05980955809354782, "learning_rate": 7.613861754480619e-06, "loss": 6.9746, "step": 30920 }, { "epoch": 0.18486759142496847, "grad_norm": 0.05839427933096886, "learning_rate": 7.597212437515179e-06, "loss": 6.9745, "step": 30930 }, { "epoch": 0.18511979823455232, "grad_norm": 0.05541177839040756, "learning_rate": 7.58057870457763e-06, "loss": 6.9749, "step": 30940 }, { "epoch": 0.1853720050441362, "grad_norm": 0.052206818014383316, "learning_rate": 7.563960567238864e-06, "loss": 6.9703, "step": 30950 }, { "epoch": 0.18562421185372005, "grad_norm": 0.06440428644418716, "learning_rate": 7.547358037058962e-06, "loss": 6.9704, "step": 30960 }, { "epoch": 0.1858764186633039, "grad_norm": 0.059852566570043564, "learning_rate": 7.5307711255871016e-06, "loss": 6.9788, "step": 30970 }, { "epoch": 0.18612862547288778, "grad_norm": 0.07064235210418701, "learning_rate": 7.514199844361651e-06, "loss": 6.9779, "step": 30980 }, { "epoch": 0.18638083228247163, "grad_norm": 0.06901508569717407, "learning_rate": 7.4976442049100436e-06, "loss": 6.983, "step": 30990 }, { "epoch": 0.18663303909205547, "grad_norm": 0.06850708276033401, "learning_rate": 7.481104218748863e-06, "loss": 6.9824, "step": 31000 }, { "epoch": 0.18688524590163935, "grad_norm": 0.07057926058769226, "learning_rate": 7.46457989738382e-06, "loss": 6.9815, "step": 31010 }, { "epoch": 0.1871374527112232, "grad_norm": 0.06959396600723267, "learning_rate": 7.4480712523096916e-06, "loss": 6.9829, "step": 31020 }, { "epoch": 0.18738965952080705, "grad_norm": 0.06896081566810608, "learning_rate": 7.4315782950103896e-06, "loss": 6.9807, "step": 31030 }, { "epoch": 0.18764186633039093, "grad_norm": 0.06889224797487259, "learning_rate": 7.415101036958882e-06, "loss": 6.98, "step": 31040 }, { "epoch": 0.18789407313997478, "grad_norm": 0.05684029310941696, "learning_rate": 7.398639489617228e-06, "loss": 6.9768, "step": 31050 }, { "epoch": 0.18814627994955863, "grad_norm": 0.05481770262122154, "learning_rate": 7.382193664436551e-06, "loss": 6.9717, "step": 31060 }, { "epoch": 0.1883984867591425, "grad_norm": 0.0547737181186676, "learning_rate": 7.365763572857055e-06, "loss": 6.9687, "step": 31070 }, { "epoch": 0.18865069356872635, "grad_norm": 0.042300041764974594, "learning_rate": 7.349349226307978e-06, "loss": 6.9675, "step": 31080 }, { "epoch": 0.1889029003783102, "grad_norm": 0.03221592307090759, "learning_rate": 7.332950636207625e-06, "loss": 6.9475, "step": 31090 }, { "epoch": 0.18915510718789408, "grad_norm": 0.05001509562134743, "learning_rate": 7.316567813963318e-06, "loss": 6.961, "step": 31100 }, { "epoch": 0.18940731399747793, "grad_norm": 0.06375488638877869, "learning_rate": 7.300200770971423e-06, "loss": 6.9714, "step": 31110 }, { "epoch": 0.1896595208070618, "grad_norm": 0.06647028774023056, "learning_rate": 7.2838495186173395e-06, "loss": 6.9794, "step": 31120 }, { "epoch": 0.18991172761664565, "grad_norm": 0.061275992542505264, "learning_rate": 7.26751406827546e-06, "loss": 6.9773, "step": 31130 }, { "epoch": 0.1901639344262295, "grad_norm": 0.06919791549444199, "learning_rate": 7.251194431309224e-06, "loss": 6.9814, "step": 31140 }, { "epoch": 0.19041614123581338, "grad_norm": 0.062045689672231674, "learning_rate": 7.234890619071002e-06, "loss": 6.9764, "step": 31150 }, { "epoch": 0.19066834804539723, "grad_norm": 0.06436080485582352, "learning_rate": 7.218602642902232e-06, "loss": 6.9777, "step": 31160 }, { "epoch": 0.19092055485498108, "grad_norm": 0.05783155933022499, "learning_rate": 7.202330514133278e-06, "loss": 6.9767, "step": 31170 }, { "epoch": 0.19117276166456496, "grad_norm": 0.0635487511754036, "learning_rate": 7.186074244083526e-06, "loss": 6.9729, "step": 31180 }, { "epoch": 0.1914249684741488, "grad_norm": 0.06713855266571045, "learning_rate": 7.169833844061299e-06, "loss": 6.9764, "step": 31190 }, { "epoch": 0.19167717528373265, "grad_norm": 0.06779792904853821, "learning_rate": 7.153609325363883e-06, "loss": 6.9821, "step": 31200 }, { "epoch": 0.19192938209331653, "grad_norm": 0.06113565340638161, "learning_rate": 7.137400699277525e-06, "loss": 6.9773, "step": 31210 }, { "epoch": 0.19218158890290038, "grad_norm": 0.062273088842630386, "learning_rate": 7.121207977077425e-06, "loss": 6.9718, "step": 31220 }, { "epoch": 0.19243379571248423, "grad_norm": 0.05473291873931885, "learning_rate": 7.105031170027706e-06, "loss": 6.9743, "step": 31230 }, { "epoch": 0.1926860025220681, "grad_norm": 0.05977165699005127, "learning_rate": 7.0888702893814165e-06, "loss": 6.9725, "step": 31240 }, { "epoch": 0.19293820933165196, "grad_norm": 0.04697752371430397, "learning_rate": 7.07272534638054e-06, "loss": 6.9706, "step": 31250 }, { "epoch": 0.1931904161412358, "grad_norm": 0.03945561870932579, "learning_rate": 7.056596352255953e-06, "loss": 6.9536, "step": 31260 }, { "epoch": 0.19344262295081968, "grad_norm": 0.05270116776227951, "learning_rate": 7.040483318227471e-06, "loss": 6.9644, "step": 31270 }, { "epoch": 0.19369482976040353, "grad_norm": 0.06354660540819168, "learning_rate": 7.024386255503771e-06, "loss": 6.9719, "step": 31280 }, { "epoch": 0.19394703656998738, "grad_norm": 0.05724763870239258, "learning_rate": 7.008305175282449e-06, "loss": 6.973, "step": 31290 }, { "epoch": 0.19419924337957126, "grad_norm": 0.05416691675782204, "learning_rate": 6.992240088749964e-06, "loss": 6.9702, "step": 31300 }, { "epoch": 0.1944514501891551, "grad_norm": 0.05254286900162697, "learning_rate": 6.976191007081656e-06, "loss": 6.969, "step": 31310 }, { "epoch": 0.19470365699873896, "grad_norm": 0.061290886253118515, "learning_rate": 6.960157941441721e-06, "loss": 6.9728, "step": 31320 }, { "epoch": 0.19495586380832283, "grad_norm": 0.054606467485427856, "learning_rate": 6.944140902983242e-06, "loss": 6.9709, "step": 31330 }, { "epoch": 0.19520807061790668, "grad_norm": 0.056859858334064484, "learning_rate": 6.92813990284813e-06, "loss": 6.9672, "step": 31340 }, { "epoch": 0.19546027742749053, "grad_norm": 0.05713654309511185, "learning_rate": 6.912154952167135e-06, "loss": 6.9704, "step": 31350 }, { "epoch": 0.1957124842370744, "grad_norm": 0.055883701890707016, "learning_rate": 6.89618606205986e-06, "loss": 6.9712, "step": 31360 }, { "epoch": 0.19596469104665826, "grad_norm": 0.06999486684799194, "learning_rate": 6.880233243634715e-06, "loss": 6.9752, "step": 31370 }, { "epoch": 0.1962168978562421, "grad_norm": 0.06820958852767944, "learning_rate": 6.864296507988962e-06, "loss": 6.9821, "step": 31380 }, { "epoch": 0.19646910466582598, "grad_norm": 0.06137564405798912, "learning_rate": 6.848375866208636e-06, "loss": 6.9795, "step": 31390 }, { "epoch": 0.19672131147540983, "grad_norm": 0.061071570962667465, "learning_rate": 6.832471329368621e-06, "loss": 6.9792, "step": 31400 }, { "epoch": 0.19697351828499368, "grad_norm": 0.06144832819700241, "learning_rate": 6.816582908532545e-06, "loss": 6.9776, "step": 31410 }, { "epoch": 0.19722572509457756, "grad_norm": 0.06083881855010986, "learning_rate": 6.800710614752876e-06, "loss": 6.9763, "step": 31420 }, { "epoch": 0.1974779319041614, "grad_norm": 0.06800564378499985, "learning_rate": 6.784854459070824e-06, "loss": 6.9813, "step": 31430 }, { "epoch": 0.19773013871374528, "grad_norm": 0.06203644722700119, "learning_rate": 6.7690144525164074e-06, "loss": 6.9737, "step": 31440 }, { "epoch": 0.19798234552332913, "grad_norm": 0.06780432909727097, "learning_rate": 6.7531906061083855e-06, "loss": 6.9785, "step": 31450 }, { "epoch": 0.19823455233291298, "grad_norm": 0.06439286470413208, "learning_rate": 6.737382930854283e-06, "loss": 6.9786, "step": 31460 }, { "epoch": 0.19848675914249686, "grad_norm": 0.06167054548859596, "learning_rate": 6.7215914377503775e-06, "loss": 6.9752, "step": 31470 }, { "epoch": 0.1987389659520807, "grad_norm": 0.06988530606031418, "learning_rate": 6.705816137781681e-06, "loss": 6.9803, "step": 31480 }, { "epoch": 0.19899117276166456, "grad_norm": 0.06443489342927933, "learning_rate": 6.690057041921965e-06, "loss": 6.9806, "step": 31490 }, { "epoch": 0.19924337957124844, "grad_norm": 0.04627808555960655, "learning_rate": 6.674314161133706e-06, "loss": 6.9745, "step": 31500 }, { "epoch": 0.19949558638083228, "grad_norm": 0.04392343759536743, "learning_rate": 6.658587506368108e-06, "loss": 6.9633, "step": 31510 }, { "epoch": 0.19974779319041613, "grad_norm": 0.04319094121456146, "learning_rate": 6.642877088565075e-06, "loss": 6.9602, "step": 31520 }, { "epoch": 0.2, "grad_norm": 0.05942687392234802, "learning_rate": 6.62718291865325e-06, "loss": 6.9644, "step": 31530 }, { "epoch": 0.20025220680958386, "grad_norm": 0.06282245367765427, "learning_rate": 6.611505007549933e-06, "loss": 6.9763, "step": 31540 }, { "epoch": 0.2005044136191677, "grad_norm": 0.059174295514822006, "learning_rate": 6.595843366161151e-06, "loss": 6.9751, "step": 31550 }, { "epoch": 0.2007566204287516, "grad_norm": 0.0559154711663723, "learning_rate": 6.580198005381589e-06, "loss": 6.9714, "step": 31560 }, { "epoch": 0.20100882723833544, "grad_norm": 0.06104200333356857, "learning_rate": 6.564568936094609e-06, "loss": 6.9714, "step": 31570 }, { "epoch": 0.20126103404791928, "grad_norm": 0.0652538537979126, "learning_rate": 6.548956169172237e-06, "loss": 6.9765, "step": 31580 }, { "epoch": 0.20151324085750316, "grad_norm": 0.06293690204620361, "learning_rate": 6.5333597154751834e-06, "loss": 6.9763, "step": 31590 }, { "epoch": 0.201765447667087, "grad_norm": 0.04876553639769554, "learning_rate": 6.517779585852785e-06, "loss": 6.9749, "step": 31600 }, { "epoch": 0.20201765447667086, "grad_norm": 0.0611407496035099, "learning_rate": 6.502215791143031e-06, "loss": 6.9715, "step": 31610 }, { "epoch": 0.20226986128625474, "grad_norm": 0.07177555561065674, "learning_rate": 6.486668342172547e-06, "loss": 6.9825, "step": 31620 }, { "epoch": 0.2025220680958386, "grad_norm": 0.06085026264190674, "learning_rate": 6.4711372497565805e-06, "loss": 6.975, "step": 31630 }, { "epoch": 0.20277427490542244, "grad_norm": 0.05871787667274475, "learning_rate": 6.455622524699029e-06, "loss": 6.9776, "step": 31640 }, { "epoch": 0.2030264817150063, "grad_norm": 0.06210998073220253, "learning_rate": 6.440124177792362e-06, "loss": 6.9758, "step": 31650 }, { "epoch": 0.20327868852459016, "grad_norm": 0.032115623354911804, "learning_rate": 6.424642219817708e-06, "loss": 6.9594, "step": 31660 }, { "epoch": 0.203530895334174, "grad_norm": 0.05357208102941513, "learning_rate": 6.409176661544733e-06, "loss": 6.9639, "step": 31670 }, { "epoch": 0.2037831021437579, "grad_norm": 0.06644190102815628, "learning_rate": 6.39372751373175e-06, "loss": 6.9712, "step": 31680 }, { "epoch": 0.20403530895334174, "grad_norm": 0.07067641615867615, "learning_rate": 6.3782947871256135e-06, "loss": 6.9822, "step": 31690 }, { "epoch": 0.2042875157629256, "grad_norm": 0.06323935836553574, "learning_rate": 6.362878492461801e-06, "loss": 6.9811, "step": 31700 }, { "epoch": 0.20453972257250946, "grad_norm": 0.06687301397323608, "learning_rate": 6.3474786404643174e-06, "loss": 6.979, "step": 31710 }, { "epoch": 0.2047919293820933, "grad_norm": 0.06567113846540451, "learning_rate": 6.332095241845745e-06, "loss": 6.9782, "step": 31720 }, { "epoch": 0.20504413619167716, "grad_norm": 0.06230836734175682, "learning_rate": 6.316728307307227e-06, "loss": 6.9757, "step": 31730 }, { "epoch": 0.20529634300126104, "grad_norm": 0.05514094606041908, "learning_rate": 6.3013778475384395e-06, "loss": 6.9701, "step": 31740 }, { "epoch": 0.2055485498108449, "grad_norm": 0.05933394655585289, "learning_rate": 6.2860438732176195e-06, "loss": 6.9715, "step": 31750 }, { "epoch": 0.20580075662042877, "grad_norm": 0.058787353336811066, "learning_rate": 6.270726395011517e-06, "loss": 6.9699, "step": 31760 }, { "epoch": 0.20605296343001261, "grad_norm": 0.053454313427209854, "learning_rate": 6.255425423575411e-06, "loss": 6.9687, "step": 31770 }, { "epoch": 0.20630517023959646, "grad_norm": 0.053490810096263885, "learning_rate": 6.2401409695530994e-06, "loss": 6.9715, "step": 31780 }, { "epoch": 0.20655737704918034, "grad_norm": 0.051408298313617706, "learning_rate": 6.224873043576903e-06, "loss": 6.966, "step": 31790 }, { "epoch": 0.2068095838587642, "grad_norm": 0.05438300594687462, "learning_rate": 6.209621656267615e-06, "loss": 6.9684, "step": 31800 }, { "epoch": 0.20706179066834804, "grad_norm": 0.05822885408997536, "learning_rate": 6.194386818234561e-06, "loss": 6.9716, "step": 31810 }, { "epoch": 0.20731399747793192, "grad_norm": 0.06761831790208817, "learning_rate": 6.179168540075528e-06, "loss": 6.976, "step": 31820 }, { "epoch": 0.20756620428751577, "grad_norm": 0.0659204050898552, "learning_rate": 6.16396683237679e-06, "loss": 6.9807, "step": 31830 }, { "epoch": 0.20781841109709961, "grad_norm": 0.061191294342279434, "learning_rate": 6.148781705713093e-06, "loss": 6.9795, "step": 31840 }, { "epoch": 0.2080706179066835, "grad_norm": 0.06701567023992538, "learning_rate": 6.133613170647644e-06, "loss": 6.9795, "step": 31850 }, { "epoch": 0.20832282471626734, "grad_norm": 0.06697458773851395, "learning_rate": 6.118461237732137e-06, "loss": 6.9807, "step": 31860 }, { "epoch": 0.2085750315258512, "grad_norm": 0.056996047496795654, "learning_rate": 6.10332591750668e-06, "loss": 6.9809, "step": 31870 }, { "epoch": 0.20882723833543507, "grad_norm": 0.05249195918440819, "learning_rate": 6.088207220499842e-06, "loss": 6.9688, "step": 31880 }, { "epoch": 0.20907944514501892, "grad_norm": 0.05485674738883972, "learning_rate": 6.073105157228622e-06, "loss": 6.968, "step": 31890 }, { "epoch": 0.20933165195460277, "grad_norm": 0.05312955006957054, "learning_rate": 6.0580197381984646e-06, "loss": 6.9682, "step": 31900 }, { "epoch": 0.20958385876418664, "grad_norm": 0.04824492335319519, "learning_rate": 6.042950973903219e-06, "loss": 6.9683, "step": 31910 }, { "epoch": 0.2098360655737705, "grad_norm": 0.05623697489500046, "learning_rate": 6.0278988748251484e-06, "loss": 6.9697, "step": 31920 }, { "epoch": 0.21008827238335434, "grad_norm": 0.057054486125707626, "learning_rate": 6.012863451434929e-06, "loss": 6.9696, "step": 31930 }, { "epoch": 0.21034047919293822, "grad_norm": 0.06178778409957886, "learning_rate": 5.99784471419165e-06, "loss": 6.9709, "step": 31940 }, { "epoch": 0.21059268600252207, "grad_norm": 0.061707284301519394, "learning_rate": 5.9828426735427624e-06, "loss": 6.9766, "step": 31950 }, { "epoch": 0.21084489281210592, "grad_norm": 0.06313606351613998, "learning_rate": 5.967857339924133e-06, "loss": 6.9758, "step": 31960 }, { "epoch": 0.2110970996216898, "grad_norm": 0.06585457921028137, "learning_rate": 5.952888723759995e-06, "loss": 6.9773, "step": 31970 }, { "epoch": 0.21134930643127364, "grad_norm": 0.05455554276704788, "learning_rate": 5.937936835462942e-06, "loss": 6.9725, "step": 31980 }, { "epoch": 0.2116015132408575, "grad_norm": 0.06830981373786926, "learning_rate": 5.923001685433947e-06, "loss": 6.9792, "step": 31990 }, { "epoch": 0.21185372005044137, "grad_norm": 0.06450837105512619, "learning_rate": 5.908083284062322e-06, "loss": 6.9797, "step": 32000 }, { "epoch": 0.21210592686002522, "grad_norm": 0.0632336437702179, "learning_rate": 5.894671051507309e-06, "loss": 6.9805, "step": 32010 }, { "epoch": 0.21235813366960907, "grad_norm": 0.0703238770365715, "learning_rate": 5.8797845011655615e-06, "loss": 6.9816, "step": 32020 }, { "epoch": 0.21261034047919294, "grad_norm": 0.06573357433080673, "learning_rate": 5.864914729544318e-06, "loss": 6.9803, "step": 32030 }, { "epoch": 0.2128625472887768, "grad_norm": 0.05523145943880081, "learning_rate": 5.850061746987422e-06, "loss": 6.9708, "step": 32040 }, { "epoch": 0.21311475409836064, "grad_norm": 0.053406111896038055, "learning_rate": 5.83522556382703e-06, "loss": 6.9669, "step": 32050 }, { "epoch": 0.21336696090794452, "grad_norm": 0.054765306413173676, "learning_rate": 5.820406190383641e-06, "loss": 6.9702, "step": 32060 }, { "epoch": 0.21361916771752837, "grad_norm": 0.06330165266990662, "learning_rate": 5.805603636966016e-06, "loss": 6.9773, "step": 32070 }, { "epoch": 0.21387137452711225, "grad_norm": 0.06910988688468933, "learning_rate": 5.79081791387126e-06, "loss": 6.982, "step": 32080 }, { "epoch": 0.2141235813366961, "grad_norm": 0.06009671837091446, "learning_rate": 5.776049031384743e-06, "loss": 6.9795, "step": 32090 }, { "epoch": 0.21437578814627994, "grad_norm": 0.04637699946761131, "learning_rate": 5.7612969997801275e-06, "loss": 6.9703, "step": 32100 }, { "epoch": 0.21462799495586382, "grad_norm": 0.0536806546151638, "learning_rate": 5.746561829319355e-06, "loss": 6.9652, "step": 32110 }, { "epoch": 0.21488020176544767, "grad_norm": 0.06089048832654953, "learning_rate": 5.731843530252626e-06, "loss": 6.9744, "step": 32120 }, { "epoch": 0.21513240857503152, "grad_norm": 0.05326372757554054, "learning_rate": 5.717142112818433e-06, "loss": 6.9757, "step": 32130 }, { "epoch": 0.2153846153846154, "grad_norm": 0.049554429948329926, "learning_rate": 5.702457587243498e-06, "loss": 6.9702, "step": 32140 }, { "epoch": 0.21563682219419925, "grad_norm": 0.059319525957107544, "learning_rate": 5.6877899637428065e-06, "loss": 6.9711, "step": 32150 }, { "epoch": 0.2158890290037831, "grad_norm": 0.049320850521326065, "learning_rate": 5.6731392525195705e-06, "loss": 6.9702, "step": 32160 }, { "epoch": 0.21614123581336697, "grad_norm": 0.06278186291456223, "learning_rate": 5.658505463765263e-06, "loss": 6.9727, "step": 32170 }, { "epoch": 0.21639344262295082, "grad_norm": 0.06552881747484207, "learning_rate": 5.643888607659563e-06, "loss": 6.9794, "step": 32180 }, { "epoch": 0.21664564943253467, "grad_norm": 0.06352954357862473, "learning_rate": 5.629288694370396e-06, "loss": 6.9785, "step": 32190 }, { "epoch": 0.21689785624211855, "grad_norm": 0.0654023066163063, "learning_rate": 5.614705734053857e-06, "loss": 6.9798, "step": 32200 }, { "epoch": 0.2171500630517024, "grad_norm": 0.05140543729066849, "learning_rate": 5.600139736854297e-06, "loss": 6.9755, "step": 32210 }, { "epoch": 0.21740226986128625, "grad_norm": 0.06290844827890396, "learning_rate": 5.585590712904236e-06, "loss": 6.9731, "step": 32220 }, { "epoch": 0.21765447667087012, "grad_norm": 0.030886249616742134, "learning_rate": 5.571058672324395e-06, "loss": 6.9583, "step": 32230 }, { "epoch": 0.21790668348045397, "grad_norm": 0.0432063490152359, "learning_rate": 5.5565436252236915e-06, "loss": 6.9599, "step": 32240 }, { "epoch": 0.21815889029003782, "grad_norm": 0.056561052799224854, "learning_rate": 5.542045581699204e-06, "loss": 6.9648, "step": 32250 }, { "epoch": 0.2184110970996217, "grad_norm": 0.05695755034685135, "learning_rate": 5.527564551836197e-06, "loss": 6.9707, "step": 32260 }, { "epoch": 0.21866330390920555, "grad_norm": 0.05327158421278, "learning_rate": 5.513100545708085e-06, "loss": 6.9746, "step": 32270 }, { "epoch": 0.2189155107187894, "grad_norm": 0.0544932521879673, "learning_rate": 5.498653573376463e-06, "loss": 6.9715, "step": 32280 }, { "epoch": 0.21916771752837327, "grad_norm": 0.06387484818696976, "learning_rate": 5.484223644891059e-06, "loss": 6.9717, "step": 32290 }, { "epoch": 0.21941992433795712, "grad_norm": 0.06473452597856522, "learning_rate": 5.469810770289746e-06, "loss": 6.9811, "step": 32300 }, { "epoch": 0.21967213114754097, "grad_norm": 0.06011480093002319, "learning_rate": 5.455414959598528e-06, "loss": 6.9795, "step": 32310 }, { "epoch": 0.21992433795712485, "grad_norm": 0.05469846352934837, "learning_rate": 5.441036222831568e-06, "loss": 6.9761, "step": 32320 }, { "epoch": 0.2201765447667087, "grad_norm": 0.057968974113464355, "learning_rate": 5.4266745699911146e-06, "loss": 6.9699, "step": 32330 }, { "epoch": 0.22042875157629255, "grad_norm": 0.0544721856713295, "learning_rate": 5.412330011067563e-06, "loss": 6.9717, "step": 32340 }, { "epoch": 0.22068095838587642, "grad_norm": 0.06649306416511536, "learning_rate": 5.398002556039405e-06, "loss": 6.9714, "step": 32350 }, { "epoch": 0.22093316519546027, "grad_norm": 0.06702875345945358, "learning_rate": 5.3836922148732085e-06, "loss": 6.9782, "step": 32360 }, { "epoch": 0.22118537200504412, "grad_norm": 0.06561864167451859, "learning_rate": 5.369398997523689e-06, "loss": 6.9777, "step": 32370 }, { "epoch": 0.221437578814628, "grad_norm": 0.04745205491781235, "learning_rate": 5.355122913933604e-06, "loss": 6.9751, "step": 32380 }, { "epoch": 0.22168978562421185, "grad_norm": 0.04248310625553131, "learning_rate": 5.340863974033824e-06, "loss": 6.9558, "step": 32390 }, { "epoch": 0.22194199243379573, "grad_norm": 0.049792494624853134, "learning_rate": 5.3266221877432766e-06, "loss": 6.9643, "step": 32400 }, { "epoch": 0.22219419924337958, "grad_norm": 0.06530747562646866, "learning_rate": 5.31239756496896e-06, "loss": 6.9713, "step": 32410 }, { "epoch": 0.22244640605296342, "grad_norm": 0.05792338028550148, "learning_rate": 5.298190115605924e-06, "loss": 6.9762, "step": 32420 }, { "epoch": 0.2226986128625473, "grad_norm": 0.05927807837724686, "learning_rate": 5.283999849537302e-06, "loss": 6.9729, "step": 32430 }, { "epoch": 0.22295081967213115, "grad_norm": 0.05997258052229881, "learning_rate": 5.269826776634234e-06, "loss": 6.9765, "step": 32440 }, { "epoch": 0.223203026481715, "grad_norm": 0.06213447451591492, "learning_rate": 5.255670906755945e-06, "loss": 6.9782, "step": 32450 }, { "epoch": 0.22345523329129888, "grad_norm": 0.06689543277025223, "learning_rate": 5.241532249749636e-06, "loss": 6.9789, "step": 32460 }, { "epoch": 0.22370744010088273, "grad_norm": 0.06065576523542404, "learning_rate": 5.2274108154505886e-06, "loss": 6.9771, "step": 32470 }, { "epoch": 0.22395964691046658, "grad_norm": 0.07058634608983994, "learning_rate": 5.213306613682079e-06, "loss": 6.9788, "step": 32480 }, { "epoch": 0.22421185372005045, "grad_norm": 0.0643412247300148, "learning_rate": 5.199219654255385e-06, "loss": 6.9811, "step": 32490 }, { "epoch": 0.2244640605296343, "grad_norm": 0.06001817435026169, "learning_rate": 5.1851499469698225e-06, "loss": 6.9759, "step": 32500 }, { "epoch": 0.22471626733921815, "grad_norm": 0.05590248852968216, "learning_rate": 5.171097501612677e-06, "loss": 6.9737, "step": 32510 }, { "epoch": 0.22496847414880203, "grad_norm": 0.05540397763252258, "learning_rate": 5.157062327959239e-06, "loss": 6.9706, "step": 32520 }, { "epoch": 0.22522068095838588, "grad_norm": 0.06513455510139465, "learning_rate": 5.1430444357727745e-06, "loss": 6.9741, "step": 32530 }, { "epoch": 0.22547288776796973, "grad_norm": 0.05845537409186363, "learning_rate": 5.129043834804547e-06, "loss": 6.979, "step": 32540 }, { "epoch": 0.2257250945775536, "grad_norm": 0.06104734167456627, "learning_rate": 5.115060534793774e-06, "loss": 6.9784, "step": 32550 }, { "epoch": 0.22597730138713745, "grad_norm": 0.06675967574119568, "learning_rate": 5.101094545467646e-06, "loss": 6.9769, "step": 32560 }, { "epoch": 0.2262295081967213, "grad_norm": 0.06385082751512527, "learning_rate": 5.087145876541302e-06, "loss": 6.9785, "step": 32570 }, { "epoch": 0.22648171500630518, "grad_norm": 0.06353133171796799, "learning_rate": 5.073214537717852e-06, "loss": 6.9777, "step": 32580 }, { "epoch": 0.22673392181588903, "grad_norm": 0.06621357798576355, "learning_rate": 5.0593005386883354e-06, "loss": 6.971, "step": 32590 }, { "epoch": 0.22698612862547288, "grad_norm": 0.06366706639528275, "learning_rate": 5.0454038891317204e-06, "loss": 6.973, "step": 32600 }, { "epoch": 0.22723833543505675, "grad_norm": 0.05688956379890442, "learning_rate": 5.031524598714947e-06, "loss": 6.9769, "step": 32610 }, { "epoch": 0.2274905422446406, "grad_norm": 0.05646675452589989, "learning_rate": 5.017662677092817e-06, "loss": 6.9713, "step": 32620 }, { "epoch": 0.22774274905422445, "grad_norm": 0.06686577200889587, "learning_rate": 5.003818133908109e-06, "loss": 6.9801, "step": 32630 }, { "epoch": 0.22799495586380833, "grad_norm": 0.048255737870931625, "learning_rate": 4.989990978791473e-06, "loss": 6.9719, "step": 32640 }, { "epoch": 0.22824716267339218, "grad_norm": 0.05497612804174423, "learning_rate": 4.976181221361492e-06, "loss": 6.9714, "step": 32650 }, { "epoch": 0.22849936948297603, "grad_norm": 0.056689150631427765, "learning_rate": 4.9623888712246315e-06, "loss": 6.9713, "step": 32660 }, { "epoch": 0.2287515762925599, "grad_norm": 0.06363890320062637, "learning_rate": 4.948613937975243e-06, "loss": 6.969, "step": 32670 }, { "epoch": 0.22900378310214375, "grad_norm": 0.06620294600725174, "learning_rate": 4.934856431195565e-06, "loss": 6.9792, "step": 32680 }, { "epoch": 0.2292559899117276, "grad_norm": 0.07021557539701462, "learning_rate": 4.921116360455732e-06, "loss": 6.9812, "step": 32690 }, { "epoch": 0.22950819672131148, "grad_norm": 0.07016345858573914, "learning_rate": 4.907393735313719e-06, "loss": 6.982, "step": 32700 }, { "epoch": 0.22976040353089533, "grad_norm": 0.06507381051778793, "learning_rate": 4.8936885653154064e-06, "loss": 6.9809, "step": 32710 }, { "epoch": 0.2300126103404792, "grad_norm": 0.05193006247282028, "learning_rate": 4.880000859994482e-06, "loss": 6.9761, "step": 32720 }, { "epoch": 0.23026481715006306, "grad_norm": 0.05827578902244568, "learning_rate": 4.866330628872514e-06, "loss": 6.9692, "step": 32730 }, { "epoch": 0.2305170239596469, "grad_norm": 0.04403829202055931, "learning_rate": 4.8526778814589214e-06, "loss": 6.9647, "step": 32740 }, { "epoch": 0.23076923076923078, "grad_norm": 0.05187278985977173, "learning_rate": 4.839042627250939e-06, "loss": 6.9638, "step": 32750 }, { "epoch": 0.23102143757881463, "grad_norm": 0.06694944947957993, "learning_rate": 4.825424875733657e-06, "loss": 6.9715, "step": 32760 }, { "epoch": 0.23127364438839848, "grad_norm": 0.06352733820676804, "learning_rate": 4.811824636379974e-06, "loss": 6.9787, "step": 32770 }, { "epoch": 0.23152585119798236, "grad_norm": 0.05405212193727493, "learning_rate": 4.79824191865061e-06, "loss": 6.9775, "step": 32780 }, { "epoch": 0.2317780580075662, "grad_norm": 0.056408971548080444, "learning_rate": 4.784676731994085e-06, "loss": 6.9695, "step": 32790 }, { "epoch": 0.23203026481715006, "grad_norm": 0.04563460126519203, "learning_rate": 4.771129085846753e-06, "loss": 6.9643, "step": 32800 }, { "epoch": 0.23228247162673393, "grad_norm": 0.05161970481276512, "learning_rate": 4.757598989632743e-06, "loss": 6.9662, "step": 32810 }, { "epoch": 0.23253467843631778, "grad_norm": 0.06474154442548752, "learning_rate": 4.744086452763978e-06, "loss": 6.9723, "step": 32820 }, { "epoch": 0.23278688524590163, "grad_norm": 0.0648968517780304, "learning_rate": 4.730591484640176e-06, "loss": 6.978, "step": 32830 }, { "epoch": 0.2330390920554855, "grad_norm": 0.06227610260248184, "learning_rate": 4.717114094648816e-06, "loss": 6.979, "step": 32840 }, { "epoch": 0.23329129886506936, "grad_norm": 0.06490001082420349, "learning_rate": 4.703654292165176e-06, "loss": 6.9815, "step": 32850 }, { "epoch": 0.2335435056746532, "grad_norm": 0.061628371477127075, "learning_rate": 4.69021208655227e-06, "loss": 6.977, "step": 32860 }, { "epoch": 0.23379571248423708, "grad_norm": 0.05921626836061478, "learning_rate": 4.6767874871609116e-06, "loss": 6.975, "step": 32870 }, { "epoch": 0.23404791929382093, "grad_norm": 0.0518091656267643, "learning_rate": 4.663380503329604e-06, "loss": 6.9662, "step": 32880 }, { "epoch": 0.23430012610340478, "grad_norm": 0.05346446856856346, "learning_rate": 4.64999114438466e-06, "loss": 6.9673, "step": 32890 }, { "epoch": 0.23455233291298866, "grad_norm": 0.054455485194921494, "learning_rate": 4.636619419640093e-06, "loss": 6.9709, "step": 32900 }, { "epoch": 0.2348045397225725, "grad_norm": 0.05594867840409279, "learning_rate": 4.623265338397671e-06, "loss": 6.9708, "step": 32910 }, { "epoch": 0.23505674653215636, "grad_norm": 0.056904133409261703, "learning_rate": 4.609928909946875e-06, "loss": 6.9755, "step": 32920 }, { "epoch": 0.23530895334174023, "grad_norm": 0.064055435359478, "learning_rate": 4.596610143564912e-06, "loss": 6.9788, "step": 32930 }, { "epoch": 0.23556116015132408, "grad_norm": 0.05098586902022362, "learning_rate": 4.583309048516693e-06, "loss": 6.9732, "step": 32940 }, { "epoch": 0.23581336696090793, "grad_norm": 0.04724693298339844, "learning_rate": 4.570025634054862e-06, "loss": 6.9646, "step": 32950 }, { "epoch": 0.2360655737704918, "grad_norm": 0.05247405916452408, "learning_rate": 4.556759909419737e-06, "loss": 6.9638, "step": 32960 }, { "epoch": 0.23631778058007566, "grad_norm": 0.05906549468636513, "learning_rate": 4.543511883839338e-06, "loss": 6.9717, "step": 32970 }, { "epoch": 0.2365699873896595, "grad_norm": 0.06851393729448318, "learning_rate": 4.530281566529383e-06, "loss": 6.9795, "step": 32980 }, { "epoch": 0.23682219419924339, "grad_norm": 0.06427455693483353, "learning_rate": 4.517068966693249e-06, "loss": 6.9789, "step": 32990 }, { "epoch": 0.23707440100882723, "grad_norm": 0.05964810401201248, "learning_rate": 4.50387409352202e-06, "loss": 6.9739, "step": 33000 }, { "epoch": 0.23732660781841108, "grad_norm": 0.06541550904512405, "learning_rate": 4.490696956194419e-06, "loss": 6.9729, "step": 33010 }, { "epoch": 0.23757881462799496, "grad_norm": 0.05566445738077164, "learning_rate": 4.477537563876856e-06, "loss": 6.9741, "step": 33020 }, { "epoch": 0.2378310214375788, "grad_norm": 0.06088533252477646, "learning_rate": 4.464395925723381e-06, "loss": 6.9769, "step": 33030 }, { "epoch": 0.2380832282471627, "grad_norm": 0.06322471052408218, "learning_rate": 4.4512720508757e-06, "loss": 6.9778, "step": 33040 }, { "epoch": 0.23833543505674654, "grad_norm": 0.06620410829782486, "learning_rate": 4.438165948463149e-06, "loss": 6.9792, "step": 33050 }, { "epoch": 0.23858764186633039, "grad_norm": 0.0622081495821476, "learning_rate": 4.4250776276027245e-06, "loss": 6.9785, "step": 33060 }, { "epoch": 0.23883984867591426, "grad_norm": 0.058299772441387177, "learning_rate": 4.412007097399042e-06, "loss": 6.9767, "step": 33070 }, { "epoch": 0.2390920554854981, "grad_norm": 0.0360674150288105, "learning_rate": 4.398954366944335e-06, "loss": 6.9613, "step": 33080 }, { "epoch": 0.23934426229508196, "grad_norm": 0.0539420023560524, "learning_rate": 4.385919445318465e-06, "loss": 6.9637, "step": 33090 }, { "epoch": 0.23959646910466584, "grad_norm": 0.06727101653814316, "learning_rate": 4.37290234158889e-06, "loss": 6.9721, "step": 33100 }, { "epoch": 0.2398486759142497, "grad_norm": 0.0663459300994873, "learning_rate": 4.359903064810697e-06, "loss": 6.9781, "step": 33110 }, { "epoch": 0.24010088272383354, "grad_norm": 0.06235098838806152, "learning_rate": 4.346921624026553e-06, "loss": 6.9792, "step": 33120 }, { "epoch": 0.2403530895334174, "grad_norm": 0.05946678668260574, "learning_rate": 4.33395802826674e-06, "loss": 6.9803, "step": 33130 }, { "epoch": 0.24060529634300126, "grad_norm": 0.05965130776166916, "learning_rate": 4.321012286549085e-06, "loss": 6.9729, "step": 33140 }, { "epoch": 0.2408575031525851, "grad_norm": 0.043022409081459045, "learning_rate": 4.308084407879038e-06, "loss": 6.9662, "step": 33150 }, { "epoch": 0.241109709962169, "grad_norm": 0.06014874577522278, "learning_rate": 4.295174401249595e-06, "loss": 6.9641, "step": 33160 }, { "epoch": 0.24136191677175284, "grad_norm": 0.05000138282775879, "learning_rate": 4.282282275641343e-06, "loss": 6.9735, "step": 33170 }, { "epoch": 0.2416141235813367, "grad_norm": 0.05636114627122879, "learning_rate": 4.269408040022413e-06, "loss": 6.9704, "step": 33180 }, { "epoch": 0.24186633039092056, "grad_norm": 0.05938113480806351, "learning_rate": 4.256551703348494e-06, "loss": 6.9763, "step": 33190 }, { "epoch": 0.2421185372005044, "grad_norm": 0.05613280087709427, "learning_rate": 4.243713274562829e-06, "loss": 6.9749, "step": 33200 }, { "epoch": 0.24237074401008826, "grad_norm": 0.058121852576732635, "learning_rate": 4.23089276259619e-06, "loss": 6.9735, "step": 33210 }, { "epoch": 0.24262295081967214, "grad_norm": 0.05618899315595627, "learning_rate": 4.218090176366912e-06, "loss": 6.9724, "step": 33220 }, { "epoch": 0.242875157629256, "grad_norm": 0.06446004658937454, "learning_rate": 4.205305524780841e-06, "loss": 6.9742, "step": 33230 }, { "epoch": 0.24312736443883984, "grad_norm": 0.057068075984716415, "learning_rate": 4.192538816731344e-06, "loss": 6.9729, "step": 33240 }, { "epoch": 0.24337957124842372, "grad_norm": 0.059671081602573395, "learning_rate": 4.1797900610993125e-06, "loss": 6.9732, "step": 33250 }, { "epoch": 0.24363177805800756, "grad_norm": 0.06215619295835495, "learning_rate": 4.167059266753163e-06, "loss": 6.9745, "step": 33260 }, { "epoch": 0.2438839848675914, "grad_norm": 0.056672386825084686, "learning_rate": 4.1543464425487874e-06, "loss": 6.9747, "step": 33270 }, { "epoch": 0.2441361916771753, "grad_norm": 0.0481967069208622, "learning_rate": 4.1416515973296095e-06, "loss": 6.9646, "step": 33280 }, { "epoch": 0.24438839848675914, "grad_norm": 0.05611860752105713, "learning_rate": 4.128974739926528e-06, "loss": 6.9696, "step": 33290 }, { "epoch": 0.244640605296343, "grad_norm": 0.05770725756883621, "learning_rate": 4.116315879157927e-06, "loss": 6.9736, "step": 33300 }, { "epoch": 0.24489281210592687, "grad_norm": 0.05984688177704811, "learning_rate": 4.103675023829674e-06, "loss": 6.9765, "step": 33310 }, { "epoch": 0.24514501891551072, "grad_norm": 0.05984925851225853, "learning_rate": 4.0910521827351256e-06, "loss": 6.9756, "step": 33320 }, { "epoch": 0.24539722572509456, "grad_norm": 0.06079718843102455, "learning_rate": 4.078447364655088e-06, "loss": 6.9756, "step": 33330 }, { "epoch": 0.24564943253467844, "grad_norm": 0.06651121377944946, "learning_rate": 4.06586057835784e-06, "loss": 6.9812, "step": 33340 }, { "epoch": 0.2459016393442623, "grad_norm": 0.05891082063317299, "learning_rate": 4.053291832599116e-06, "loss": 6.9751, "step": 33350 }, { "epoch": 0.24615384615384617, "grad_norm": 0.05431085824966431, "learning_rate": 4.040741136122088e-06, "loss": 6.9729, "step": 33360 }, { "epoch": 0.24640605296343002, "grad_norm": 0.057402245700359344, "learning_rate": 4.028208497657399e-06, "loss": 6.9734, "step": 33370 }, { "epoch": 0.24665825977301387, "grad_norm": 0.05512393265962601, "learning_rate": 4.01569392592311e-06, "loss": 6.9691, "step": 33380 }, { "epoch": 0.24691046658259774, "grad_norm": 0.057502925395965576, "learning_rate": 4.003197429624721e-06, "loss": 6.9735, "step": 33390 }, { "epoch": 0.2471626733921816, "grad_norm": 0.06348613649606705, "learning_rate": 3.990719017455146e-06, "loss": 6.9784, "step": 33400 }, { "epoch": 0.24741488020176544, "grad_norm": 0.048866599798202515, "learning_rate": 3.978258698094748e-06, "loss": 6.9738, "step": 33410 }, { "epoch": 0.24766708701134932, "grad_norm": 0.054177846759557724, "learning_rate": 3.965816480211268e-06, "loss": 6.9708, "step": 33420 }, { "epoch": 0.24791929382093317, "grad_norm": 0.048065271228551865, "learning_rate": 3.953392372459887e-06, "loss": 6.9635, "step": 33430 }, { "epoch": 0.24817150063051702, "grad_norm": 0.05518287047743797, "learning_rate": 3.940986383483172e-06, "loss": 6.9673, "step": 33440 }, { "epoch": 0.2484237074401009, "grad_norm": 0.052498094737529755, "learning_rate": 3.928598521911086e-06, "loss": 6.9687, "step": 33450 }, { "epoch": 0.00025220680958385876, "grad_norm": 0.057390935719013214, "learning_rate": 3.916228796360982e-06, "loss": 6.9765, "step": 33460 }, { "epoch": 0.0005044136191677175, "grad_norm": 0.0614200197160244, "learning_rate": 3.903877215437596e-06, "loss": 6.976, "step": 33470 }, { "epoch": 0.0007566204287515763, "grad_norm": 0.06430237740278244, "learning_rate": 3.891543787733054e-06, "loss": 6.977, "step": 33480 }, { "epoch": 0.001008827238335435, "grad_norm": 0.048947155475616455, "learning_rate": 3.879228521826844e-06, "loss": 6.9644, "step": 33490 }, { "epoch": 0.0012610340479192938, "grad_norm": 0.04916370287537575, "learning_rate": 3.86693142628582e-06, "loss": 6.9646, "step": 33500 }, { "epoch": 0.0015132408575031526, "grad_norm": 0.055712755769491196, "learning_rate": 3.854652509664189e-06, "loss": 6.9697, "step": 33510 }, { "epoch": 0.0017654476670870113, "grad_norm": 0.04896046966314316, "learning_rate": 3.842391780503539e-06, "loss": 6.9694, "step": 33520 }, { "epoch": 0.00201765447667087, "grad_norm": 0.07103961706161499, "learning_rate": 3.83014924733277e-06, "loss": 6.9753, "step": 33530 }, { "epoch": 0.002269861286254729, "grad_norm": 0.054941244423389435, "learning_rate": 3.817924918668168e-06, "loss": 6.9773, "step": 33540 }, { "epoch": 0.0025220680958385876, "grad_norm": 0.06354881078004837, "learning_rate": 3.805718803013295e-06, "loss": 6.9746, "step": 33550 }, { "epoch": 0.0027742749054224464, "grad_norm": 0.06172466278076172, "learning_rate": 3.7935309088591075e-06, "loss": 6.9779, "step": 33560 }, { "epoch": 0.003026481715006305, "grad_norm": 0.06377216428518295, "learning_rate": 3.7813612446838465e-06, "loss": 6.9761, "step": 33570 }, { "epoch": 0.003278688524590164, "grad_norm": 0.06148877739906311, "learning_rate": 3.76920981895308e-06, "loss": 6.9755, "step": 33580 }, { "epoch": 0.0035308953341740227, "grad_norm": 0.06515023857355118, "learning_rate": 3.7570766401196967e-06, "loss": 6.9778, "step": 33590 }, { "epoch": 0.0037831021437578815, "grad_norm": 0.06696447730064392, "learning_rate": 3.7449617166238882e-06, "loss": 6.9788, "step": 33600 }, { "epoch": 0.00403530895334174, "grad_norm": 0.047485217452049255, "learning_rate": 3.7328650568931464e-06, "loss": 6.9683, "step": 33610 }, { "epoch": 0.004287515762925599, "grad_norm": 0.04665369540452957, "learning_rate": 3.7207866693422477e-06, "loss": 6.9603, "step": 33620 }, { "epoch": 0.004539722572509458, "grad_norm": 0.05122684687376022, "learning_rate": 3.7087265623732804e-06, "loss": 6.9721, "step": 33630 }, { "epoch": 0.0047919293820933165, "grad_norm": 0.05702594667673111, "learning_rate": 3.6966847443756e-06, "loss": 6.9691, "step": 33640 }, { "epoch": 0.005044136191677175, "grad_norm": 0.051431890577077866, "learning_rate": 3.684661223725847e-06, "loss": 6.9704, "step": 33650 }, { "epoch": 0.005296343001261034, "grad_norm": 0.06723734736442566, "learning_rate": 3.6726560087879203e-06, "loss": 6.9724, "step": 33660 }, { "epoch": 0.005548549810844893, "grad_norm": 0.05904792994260788, "learning_rate": 3.6606691079130094e-06, "loss": 6.9779, "step": 33670 }, { "epoch": 0.005800756620428752, "grad_norm": 0.054372016340494156, "learning_rate": 3.6487005294395347e-06, "loss": 6.9715, "step": 33680 }, { "epoch": 0.00605296343001261, "grad_norm": 0.057941921055316925, "learning_rate": 3.6367502816931995e-06, "loss": 6.9714, "step": 33690 }, { "epoch": 0.006305170239596469, "grad_norm": 0.04663163051009178, "learning_rate": 3.6248183729869443e-06, "loss": 6.9658, "step": 33700 }, { "epoch": 0.006557377049180328, "grad_norm": 0.04829082265496254, "learning_rate": 3.6129048116209327e-06, "loss": 6.9663, "step": 33710 }, { "epoch": 0.006809583858764187, "grad_norm": 0.05311236158013344, "learning_rate": 3.601009605882596e-06, "loss": 6.9697, "step": 33720 }, { "epoch": 0.007061790668348045, "grad_norm": 0.041238877922296524, "learning_rate": 3.589132764046578e-06, "loss": 6.9645, "step": 33730 }, { "epoch": 0.007313997477931904, "grad_norm": 0.0460687130689621, "learning_rate": 3.5772742943747593e-06, "loss": 6.962, "step": 33740 }, { "epoch": 0.007566204287515763, "grad_norm": 0.0641811192035675, "learning_rate": 3.565434205116236e-06, "loss": 6.9788, "step": 33750 }, { "epoch": 0.007818411097099623, "grad_norm": 0.06326179951429367, "learning_rate": 3.553612504507312e-06, "loss": 6.9757, "step": 33760 }, { "epoch": 0.00807061790668348, "grad_norm": 0.06292463093996048, "learning_rate": 3.5418092007714997e-06, "loss": 6.9736, "step": 33770 }, { "epoch": 0.00832282471626734, "grad_norm": 0.06882239133119583, "learning_rate": 3.530024302119531e-06, "loss": 6.977, "step": 33780 }, { "epoch": 0.008575031525851198, "grad_norm": 0.06909830123186111, "learning_rate": 3.518257816749315e-06, "loss": 6.9803, "step": 33790 }, { "epoch": 0.008827238335435058, "grad_norm": 0.05347898602485657, "learning_rate": 3.5065097528459712e-06, "loss": 6.9795, "step": 33800 }, { "epoch": 0.009079445145018916, "grad_norm": 0.042629364877939224, "learning_rate": 3.494780118581772e-06, "loss": 6.9648, "step": 33810 }, { "epoch": 0.009331651954602775, "grad_norm": 0.05127555504441261, "learning_rate": 3.4830689221162102e-06, "loss": 6.9679, "step": 33820 }, { "epoch": 0.009583858764186633, "grad_norm": 0.05741863697767258, "learning_rate": 3.4713761715959237e-06, "loss": 6.9701, "step": 33830 }, { "epoch": 0.009836065573770493, "grad_norm": 0.055692579597234726, "learning_rate": 3.4597018751547248e-06, "loss": 6.9798, "step": 33840 }, { "epoch": 0.01008827238335435, "grad_norm": 0.0677981898188591, "learning_rate": 3.448046040913605e-06, "loss": 6.977, "step": 33850 }, { "epoch": 0.01034047919293821, "grad_norm": 0.05008483678102493, "learning_rate": 3.4364086769806947e-06, "loss": 6.9691, "step": 33860 }, { "epoch": 0.010592686002522068, "grad_norm": 0.04905078187584877, "learning_rate": 3.424789791451278e-06, "loss": 6.9637, "step": 33870 }, { "epoch": 0.010844892812105928, "grad_norm": 0.057890377938747406, "learning_rate": 3.4131893924077806e-06, "loss": 6.97, "step": 33880 }, { "epoch": 0.011097099621689786, "grad_norm": 0.06040763854980469, "learning_rate": 3.401607487919797e-06, "loss": 6.9757, "step": 33890 }, { "epoch": 0.011349306431273645, "grad_norm": 0.06315512210130692, "learning_rate": 3.3900440860440218e-06, "loss": 6.9769, "step": 33900 }, { "epoch": 0.011601513240857503, "grad_norm": 0.04006970301270485, "learning_rate": 3.3784991948242916e-06, "loss": 6.9657, "step": 33910 }, { "epoch": 0.011853720050441363, "grad_norm": 0.03263578191399574, "learning_rate": 3.3669728222915653e-06, "loss": 6.9528, "step": 33920 }, { "epoch": 0.01210592686002522, "grad_norm": 0.05675864219665527, "learning_rate": 3.355464976463932e-06, "loss": 6.9709, "step": 33930 }, { "epoch": 0.01235813366960908, "grad_norm": 0.06576443463563919, "learning_rate": 3.343975665346579e-06, "loss": 6.9732, "step": 33940 }, { "epoch": 0.012610340479192938, "grad_norm": 0.04899245500564575, "learning_rate": 3.332504896931793e-06, "loss": 6.9662, "step": 33950 }, { "epoch": 0.012862547288776798, "grad_norm": 0.046789105981588364, "learning_rate": 3.3210526791989937e-06, "loss": 6.965, "step": 33960 }, { "epoch": 0.013114754098360656, "grad_norm": 0.06253122538328171, "learning_rate": 3.3096190201146493e-06, "loss": 6.9674, "step": 33970 }, { "epoch": 0.013366960907944515, "grad_norm": 0.06416606903076172, "learning_rate": 3.2982039276323673e-06, "loss": 6.9776, "step": 33980 }, { "epoch": 0.013619167717528373, "grad_norm": 0.06268078833818436, "learning_rate": 3.286807409692796e-06, "loss": 6.9801, "step": 33990 }, { "epoch": 0.013871374527112233, "grad_norm": 0.055605147033929825, "learning_rate": 3.2754294742237035e-06, "loss": 6.9819, "step": 34000 }, { "epoch": 0.01412358133669609, "grad_norm": 0.05864763259887695, "learning_rate": 3.26520522685564e-06, "loss": 6.9727, "step": 34010 }, { "epoch": 0.01437578814627995, "grad_norm": 0.049307700246572495, "learning_rate": 3.2538626198750853e-06, "loss": 6.9699, "step": 34020 }, { "epoch": 0.014627994955863808, "grad_norm": 0.04813304916024208, "learning_rate": 3.242538618282348e-06, "loss": 6.9688, "step": 34030 }, { "epoch": 0.014880201765447668, "grad_norm": 0.07024627923965454, "learning_rate": 3.2312332299547296e-06, "loss": 6.9786, "step": 34040 }, { "epoch": 0.015132408575031526, "grad_norm": 0.05989966541528702, "learning_rate": 3.2199464627565966e-06, "loss": 6.9811, "step": 34050 }, { "epoch": 0.015384615384615385, "grad_norm": 0.05554244667291641, "learning_rate": 3.208678324539338e-06, "loss": 6.9728, "step": 34060 }, { "epoch": 0.015636822194199245, "grad_norm": 0.052751537412405014, "learning_rate": 3.1974288231414106e-06, "loss": 6.9694, "step": 34070 }, { "epoch": 0.0158890290037831, "grad_norm": 0.06469368934631348, "learning_rate": 3.186197966388291e-06, "loss": 6.9759, "step": 34080 }, { "epoch": 0.01614123581336696, "grad_norm": 0.059980928897857666, "learning_rate": 3.174985762092474e-06, "loss": 6.9813, "step": 34090 }, { "epoch": 0.01639344262295082, "grad_norm": 0.06394225358963013, "learning_rate": 3.1637922180535096e-06, "loss": 6.9778, "step": 34100 }, { "epoch": 0.01664564943253468, "grad_norm": 0.05838807672262192, "learning_rate": 3.1526173420579363e-06, "loss": 6.9724, "step": 34110 }, { "epoch": 0.016897856242118536, "grad_norm": 0.05153479799628258, "learning_rate": 3.1414611418793395e-06, "loss": 6.9739, "step": 34120 }, { "epoch": 0.017150063051702396, "grad_norm": 0.05943926423788071, "learning_rate": 3.130323625278284e-06, "loss": 6.9699, "step": 34130 }, { "epoch": 0.017402269861286256, "grad_norm": 0.06835153698921204, "learning_rate": 3.1192048000023532e-06, "loss": 6.9749, "step": 34140 }, { "epoch": 0.017654476670870115, "grad_norm": 0.06456714123487473, "learning_rate": 3.108104673786113e-06, "loss": 6.9791, "step": 34150 }, { "epoch": 0.01790668348045397, "grad_norm": 0.0634806826710701, "learning_rate": 3.0970232543511502e-06, "loss": 6.9782, "step": 34160 }, { "epoch": 0.01815889029003783, "grad_norm": 0.05961679667234421, "learning_rate": 3.085960549406005e-06, "loss": 6.9796, "step": 34170 }, { "epoch": 0.01841109709962169, "grad_norm": 0.05248037353157997, "learning_rate": 3.0749165666462355e-06, "loss": 6.9728, "step": 34180 }, { "epoch": 0.01866330390920555, "grad_norm": 0.04594135656952858, "learning_rate": 3.063891313754332e-06, "loss": 6.9685, "step": 34190 }, { "epoch": 0.018915510718789406, "grad_norm": 0.05869334563612938, "learning_rate": 3.052884798399794e-06, "loss": 6.9672, "step": 34200 }, { "epoch": 0.019167717528373266, "grad_norm": 0.05135191231966019, "learning_rate": 3.0418970282390736e-06, "loss": 6.9719, "step": 34210 }, { "epoch": 0.019419924337957126, "grad_norm": 0.06704134494066238, "learning_rate": 3.030928010915569e-06, "loss": 6.9752, "step": 34220 }, { "epoch": 0.019672131147540985, "grad_norm": 0.06330897659063339, "learning_rate": 3.0199777540596606e-06, "loss": 6.979, "step": 34230 }, { "epoch": 0.01992433795712484, "grad_norm": 0.05465685948729515, "learning_rate": 3.0090462652886586e-06, "loss": 6.9714, "step": 34240 }, { "epoch": 0.0201765447667087, "grad_norm": 0.036960478872060776, "learning_rate": 2.9981335522068267e-06, "loss": 6.963, "step": 34250 }, { "epoch": 0.02042875157629256, "grad_norm": 0.0498802624642849, "learning_rate": 2.987239622405352e-06, "loss": 6.9629, "step": 34260 }, { "epoch": 0.02068095838587642, "grad_norm": 0.05880827084183693, "learning_rate": 2.976364483462384e-06, "loss": 6.976, "step": 34270 }, { "epoch": 0.020933165195460277, "grad_norm": 0.04151790961623192, "learning_rate": 2.9655081429429733e-06, "loss": 6.9643, "step": 34280 }, { "epoch": 0.021185372005044136, "grad_norm": 0.04899163544178009, "learning_rate": 2.9546706083991126e-06, "loss": 6.9606, "step": 34290 }, { "epoch": 0.021437578814627996, "grad_norm": 0.06623511761426926, "learning_rate": 2.9438518873696917e-06, "loss": 6.9759, "step": 34300 }, { "epoch": 0.021689785624211855, "grad_norm": 0.036019060760736465, "learning_rate": 2.9330519873805405e-06, "loss": 6.9547, "step": 34310 }, { "epoch": 0.02194199243379571, "grad_norm": 0.04830363765358925, "learning_rate": 2.9222709159443795e-06, "loss": 6.9559, "step": 34320 }, { "epoch": 0.02219419924337957, "grad_norm": 0.056718163192272186, "learning_rate": 2.911508680560826e-06, "loss": 6.9692, "step": 34330 }, { "epoch": 0.02244640605296343, "grad_norm": 0.059888239949941635, "learning_rate": 2.900765288716418e-06, "loss": 6.9726, "step": 34340 }, { "epoch": 0.02269861286254729, "grad_norm": 0.05360483378171921, "learning_rate": 2.890040747884548e-06, "loss": 6.9734, "step": 34350 }, { "epoch": 0.022950819672131147, "grad_norm": 0.06399554759263992, "learning_rate": 2.8793350655255345e-06, "loss": 6.9724, "step": 34360 }, { "epoch": 0.023203026481715006, "grad_norm": 0.05429873242974281, "learning_rate": 2.8686482490865506e-06, "loss": 6.9776, "step": 34370 }, { "epoch": 0.023455233291298866, "grad_norm": 0.060303714126348495, "learning_rate": 2.8579803060016665e-06, "loss": 6.9744, "step": 34380 }, { "epoch": 0.023707440100882726, "grad_norm": 0.05685173720121384, "learning_rate": 2.8473312436918055e-06, "loss": 6.974, "step": 34390 }, { "epoch": 0.02395964691046658, "grad_norm": 0.06582388281822205, "learning_rate": 2.8367010695647654e-06, "loss": 6.9768, "step": 34400 }, { "epoch": 0.02421185372005044, "grad_norm": 0.06965703517198563, "learning_rate": 2.826089791015195e-06, "loss": 6.9793, "step": 34410 }, { "epoch": 0.0244640605296343, "grad_norm": 0.05969509482383728, "learning_rate": 2.8154974154246193e-06, "loss": 6.9746, "step": 34420 }, { "epoch": 0.02471626733921816, "grad_norm": 0.0653560534119606, "learning_rate": 2.8049239501613944e-06, "loss": 6.9758, "step": 34430 }, { "epoch": 0.024968474148802017, "grad_norm": 0.07282951474189758, "learning_rate": 2.794369402580733e-06, "loss": 6.9788, "step": 34440 }, { "epoch": 0.025220680958385876, "grad_norm": 0.04820742458105087, "learning_rate": 2.7838337800246826e-06, "loss": 6.9634, "step": 34450 }, { "epoch": 0.025472887767969736, "grad_norm": 0.06337758898735046, "learning_rate": 2.7733170898221173e-06, "loss": 6.965, "step": 34460 }, { "epoch": 0.025725094577553596, "grad_norm": 0.06335536390542984, "learning_rate": 2.762819339288768e-06, "loss": 6.9795, "step": 34470 }, { "epoch": 0.025977301387137452, "grad_norm": 0.0389319472014904, "learning_rate": 2.7523405357271635e-06, "loss": 6.9762, "step": 34480 }, { "epoch": 0.02622950819672131, "grad_norm": 0.04990828037261963, "learning_rate": 2.74188068642667e-06, "loss": 6.9625, "step": 34490 }, { "epoch": 0.02648171500630517, "grad_norm": 0.04894784837961197, "learning_rate": 2.7314397986634563e-06, "loss": 6.9648, "step": 34500 }, { "epoch": 0.02673392181588903, "grad_norm": 0.05890405923128128, "learning_rate": 2.7210178797005103e-06, "loss": 6.9688, "step": 34510 }, { "epoch": 0.026986128625472887, "grad_norm": 0.06283017247915268, "learning_rate": 2.7106149367876122e-06, "loss": 6.9766, "step": 34520 }, { "epoch": 0.027238335435056747, "grad_norm": 0.040507737547159195, "learning_rate": 2.700230977161363e-06, "loss": 6.9654, "step": 34530 }, { "epoch": 0.027490542244640606, "grad_norm": 0.051310040056705475, "learning_rate": 2.6898660080451386e-06, "loss": 6.9629, "step": 34540 }, { "epoch": 0.027742749054224466, "grad_norm": 0.06033972650766373, "learning_rate": 2.679520036649111e-06, "loss": 6.9643, "step": 34550 }, { "epoch": 0.027994955863808322, "grad_norm": 0.05506264418363571, "learning_rate": 2.669193070170234e-06, "loss": 6.9755, "step": 34560 }, { "epoch": 0.02824716267339218, "grad_norm": 0.06604225188493729, "learning_rate": 2.6588851157922566e-06, "loss": 6.9752, "step": 34570 }, { "epoch": 0.02849936948297604, "grad_norm": 0.056342653930187225, "learning_rate": 2.648596180685681e-06, "loss": 6.973, "step": 34580 }, { "epoch": 0.0287515762925599, "grad_norm": 0.06508021801710129, "learning_rate": 2.6383262720077938e-06, "loss": 6.974, "step": 34590 }, { "epoch": 0.029003783102143757, "grad_norm": 0.0648830458521843, "learning_rate": 2.628075396902635e-06, "loss": 6.9736, "step": 34600 }, { "epoch": 0.029255989911727617, "grad_norm": 0.06370043754577637, "learning_rate": 2.61784356250101e-06, "loss": 6.9775, "step": 34610 }, { "epoch": 0.029508196721311476, "grad_norm": 0.04988579452037811, "learning_rate": 2.6076307759204853e-06, "loss": 6.9681, "step": 34620 }, { "epoch": 0.029760403530895336, "grad_norm": 0.06456726789474487, "learning_rate": 2.597437044265365e-06, "loss": 6.9719, "step": 34630 }, { "epoch": 0.030012610340479192, "grad_norm": 0.06575915217399597, "learning_rate": 2.587262374626713e-06, "loss": 6.9787, "step": 34640 }, { "epoch": 0.03026481715006305, "grad_norm": 0.05827938765287399, "learning_rate": 2.577106774082315e-06, "loss": 6.9764, "step": 34650 }, { "epoch": 0.03051702395964691, "grad_norm": 0.0591464564204216, "learning_rate": 2.5669702496967063e-06, "loss": 6.9755, "step": 34660 }, { "epoch": 0.03076923076923077, "grad_norm": 0.060016900300979614, "learning_rate": 2.5568528085211395e-06, "loss": 6.9758, "step": 34670 }, { "epoch": 0.031021437578814627, "grad_norm": 0.06022990122437477, "learning_rate": 2.546754457593612e-06, "loss": 6.9758, "step": 34680 }, { "epoch": 0.03127364438839849, "grad_norm": 0.06070544570684433, "learning_rate": 2.536675203938821e-06, "loss": 6.9772, "step": 34690 }, { "epoch": 0.031525851197982346, "grad_norm": 0.04647465795278549, "learning_rate": 2.5266150545681874e-06, "loss": 6.967, "step": 34700 }, { "epoch": 0.0317780580075662, "grad_norm": 0.049124956130981445, "learning_rate": 2.5165740164798445e-06, "loss": 6.9598, "step": 34710 }, { "epoch": 0.032030264817150066, "grad_norm": 0.052303798496723175, "learning_rate": 2.5065520966586208e-06, "loss": 6.9669, "step": 34720 }, { "epoch": 0.03228247162673392, "grad_norm": 0.056430667638778687, "learning_rate": 2.49654930207607e-06, "loss": 6.972, "step": 34730 }, { "epoch": 0.03253467843631778, "grad_norm": 0.0556919239461422, "learning_rate": 2.486565639690409e-06, "loss": 6.9685, "step": 34740 }, { "epoch": 0.03278688524590164, "grad_norm": 0.059036966413259506, "learning_rate": 2.476601116446583e-06, "loss": 6.973, "step": 34750 }, { "epoch": 0.0330390920554855, "grad_norm": 0.05788617953658104, "learning_rate": 2.4666557392761803e-06, "loss": 6.9731, "step": 34760 }, { "epoch": 0.03329129886506936, "grad_norm": 0.06165916472673416, "learning_rate": 2.45672951509751e-06, "loss": 6.976, "step": 34770 }, { "epoch": 0.033543505674653216, "grad_norm": 0.06544417887926102, "learning_rate": 2.446822450815526e-06, "loss": 6.9781, "step": 34780 }, { "epoch": 0.03379571248423707, "grad_norm": 0.0613766647875309, "learning_rate": 2.436934553321888e-06, "loss": 6.9745, "step": 34790 }, { "epoch": 0.034047919293820936, "grad_norm": 0.0646723285317421, "learning_rate": 2.427065829494891e-06, "loss": 6.9762, "step": 34800 }, { "epoch": 0.03430012610340479, "grad_norm": 0.05406802520155907, "learning_rate": 2.417216286199507e-06, "loss": 6.9748, "step": 34810 }, { "epoch": 0.03455233291298865, "grad_norm": 0.05992376059293747, "learning_rate": 2.4073859302873657e-06, "loss": 6.9721, "step": 34820 }, { "epoch": 0.03480453972257251, "grad_norm": 0.05282711610198021, "learning_rate": 2.39757476859674e-06, "loss": 6.9704, "step": 34830 }, { "epoch": 0.03505674653215637, "grad_norm": 0.06412170082330704, "learning_rate": 2.387782807952573e-06, "loss": 6.9691, "step": 34840 }, { "epoch": 0.03530895334174023, "grad_norm": 0.050077565014362335, "learning_rate": 2.3780100551664243e-06, "loss": 6.9715, "step": 34850 }, { "epoch": 0.03556116015132409, "grad_norm": 0.05804145336151123, "learning_rate": 2.368256517036508e-06, "loss": 6.9705, "step": 34860 }, { "epoch": 0.03581336696090794, "grad_norm": 0.05785294994711876, "learning_rate": 2.3585222003476605e-06, "loss": 6.9705, "step": 34870 }, { "epoch": 0.036065573770491806, "grad_norm": 0.06273018568754196, "learning_rate": 2.348807111871368e-06, "loss": 6.974, "step": 34880 }, { "epoch": 0.03631778058007566, "grad_norm": 0.061540842056274414, "learning_rate": 2.3391112583657126e-06, "loss": 6.9697, "step": 34890 }, { "epoch": 0.03656998738965952, "grad_norm": 0.05590066686272621, "learning_rate": 2.3294346465754244e-06, "loss": 6.9736, "step": 34900 }, { "epoch": 0.03682219419924338, "grad_norm": 0.04519886150956154, "learning_rate": 2.3197772832318296e-06, "loss": 6.9705, "step": 34910 }, { "epoch": 0.03707440100882724, "grad_norm": 0.047319382429122925, "learning_rate": 2.3101391750528676e-06, "loss": 6.9619, "step": 34920 }, { "epoch": 0.0373266078184111, "grad_norm": 0.0518304742872715, "learning_rate": 2.3005203287430854e-06, "loss": 6.9642, "step": 34930 }, { "epoch": 0.03757881462799496, "grad_norm": 0.05875634402036667, "learning_rate": 2.2909207509936336e-06, "loss": 6.9772, "step": 34940 }, { "epoch": 0.03783102143757881, "grad_norm": 0.055076900869607925, "learning_rate": 2.2813404484822576e-06, "loss": 6.9747, "step": 34950 }, { "epoch": 0.038083228247162676, "grad_norm": 0.06349330395460129, "learning_rate": 2.2717794278732897e-06, "loss": 6.9748, "step": 34960 }, { "epoch": 0.03833543505674653, "grad_norm": 0.06125611066818237, "learning_rate": 2.262237695817656e-06, "loss": 6.9766, "step": 34970 }, { "epoch": 0.03858764186633039, "grad_norm": 0.04638680815696716, "learning_rate": 2.252715258952854e-06, "loss": 6.9614, "step": 34980 }, { "epoch": 0.03883984867591425, "grad_norm": 0.058373868465423584, "learning_rate": 2.243212123902979e-06, "loss": 6.9627, "step": 34990 }, { "epoch": 0.03909205548549811, "grad_norm": 0.061816755682229996, "learning_rate": 2.2337282972786755e-06, "loss": 6.9779, "step": 35000 }, { "epoch": 0.03934426229508197, "grad_norm": 0.05358228459954262, "learning_rate": 2.224263785677183e-06, "loss": 6.9739, "step": 35010 }, { "epoch": 0.03959646910466583, "grad_norm": 0.05829599127173424, "learning_rate": 2.2148185956822687e-06, "loss": 6.9782, "step": 35020 }, { "epoch": 0.03984867591424968, "grad_norm": 0.05381446331739426, "learning_rate": 2.2053927338642966e-06, "loss": 6.9712, "step": 35030 }, { "epoch": 0.040100882723833546, "grad_norm": 0.060615845024585724, "learning_rate": 2.1959862067801618e-06, "loss": 6.9719, "step": 35040 }, { "epoch": 0.0403530895334174, "grad_norm": 0.06600609421730042, "learning_rate": 2.1865990209733232e-06, "loss": 6.9746, "step": 35050 }, { "epoch": 0.04060529634300126, "grad_norm": 0.06068098172545433, "learning_rate": 2.1772311829737744e-06, "loss": 6.9731, "step": 35060 }, { "epoch": 0.04085750315258512, "grad_norm": 0.03742655739188194, "learning_rate": 2.167882699298056e-06, "loss": 6.966, "step": 35070 }, { "epoch": 0.04110970996216898, "grad_norm": 0.04975556954741478, "learning_rate": 2.1585535764492427e-06, "loss": 6.9566, "step": 35080 }, { "epoch": 0.04136191677175284, "grad_norm": 0.06274020671844482, "learning_rate": 2.1492438209169397e-06, "loss": 6.9759, "step": 35090 }, { "epoch": 0.0416141235813367, "grad_norm": 0.053707245737314224, "learning_rate": 2.13995343917729e-06, "loss": 6.9729, "step": 35100 }, { "epoch": 0.04186633039092055, "grad_norm": 0.06722044199705124, "learning_rate": 2.1306824376929512e-06, "loss": 6.9765, "step": 35110 }, { "epoch": 0.042118537200504416, "grad_norm": 0.06326141953468323, "learning_rate": 2.1214308229130976e-06, "loss": 6.979, "step": 35120 }, { "epoch": 0.04237074401008827, "grad_norm": 0.0622180737555027, "learning_rate": 2.112198601273414e-06, "loss": 6.9781, "step": 35130 }, { "epoch": 0.04262295081967213, "grad_norm": 0.0651743933558464, "learning_rate": 2.102985779196117e-06, "loss": 6.9787, "step": 35140 }, { "epoch": 0.04287515762925599, "grad_norm": 0.06355565786361694, "learning_rate": 2.0937923630898936e-06, "loss": 6.9793, "step": 35150 }, { "epoch": 0.04312736443883985, "grad_norm": 0.05321398377418518, "learning_rate": 2.084618359349968e-06, "loss": 6.9753, "step": 35160 }, { "epoch": 0.04337957124842371, "grad_norm": 0.05497002229094505, "learning_rate": 2.0754637743580353e-06, "loss": 6.9711, "step": 35170 }, { "epoch": 0.04363177805800757, "grad_norm": 0.05345142260193825, "learning_rate": 2.066328614482288e-06, "loss": 6.9745, "step": 35180 }, { "epoch": 0.04388398486759142, "grad_norm": 0.04815535247325897, "learning_rate": 2.05721288607741e-06, "loss": 6.9674, "step": 35190 }, { "epoch": 0.044136191677175286, "grad_norm": 0.05364814028143883, "learning_rate": 2.0481165954845614e-06, "loss": 6.9694, "step": 35200 }, { "epoch": 0.04438839848675914, "grad_norm": 0.05679711699485779, "learning_rate": 2.039039749031392e-06, "loss": 6.9713, "step": 35210 }, { "epoch": 0.044640605296343, "grad_norm": 0.049549274146556854, "learning_rate": 2.0299823530320194e-06, "loss": 6.9737, "step": 35220 }, { "epoch": 0.04489281210592686, "grad_norm": 0.04373299703001976, "learning_rate": 2.020944413787028e-06, "loss": 6.9618, "step": 35230 }, { "epoch": 0.04514501891551072, "grad_norm": 0.04485857114195824, "learning_rate": 2.0119259375834652e-06, "loss": 6.9667, "step": 35240 }, { "epoch": 0.04539722572509458, "grad_norm": 0.061490897089242935, "learning_rate": 2.002926930694853e-06, "loss": 6.9709, "step": 35250 }, { "epoch": 0.04564943253467844, "grad_norm": 0.06668835133314133, "learning_rate": 1.993947399381153e-06, "loss": 6.9792, "step": 35260 }, { "epoch": 0.04590163934426229, "grad_norm": 0.05552633851766586, "learning_rate": 1.9849873498887995e-06, "loss": 6.9756, "step": 35270 }, { "epoch": 0.046153846153846156, "grad_norm": 0.06582916527986526, "learning_rate": 1.976046788450646e-06, "loss": 6.9746, "step": 35280 }, { "epoch": 0.04640605296343001, "grad_norm": 0.06618969887495041, "learning_rate": 1.9671257212860183e-06, "loss": 6.9803, "step": 35290 }, { "epoch": 0.04665825977301387, "grad_norm": 0.062262170016765594, "learning_rate": 1.958224154600662e-06, "loss": 6.9796, "step": 35300 }, { "epoch": 0.04691046658259773, "grad_norm": 0.03535833954811096, "learning_rate": 1.9493420945867733e-06, "loss": 6.9698, "step": 35310 }, { "epoch": 0.04716267339218159, "grad_norm": 0.028665021061897278, "learning_rate": 1.940479547422964e-06, "loss": 6.9503, "step": 35320 }, { "epoch": 0.04741488020176545, "grad_norm": 0.059936974197626114, "learning_rate": 1.931636519274279e-06, "loss": 6.9606, "step": 35330 }, { "epoch": 0.04766708701134931, "grad_norm": 0.04452815651893616, "learning_rate": 1.9228130162921876e-06, "loss": 6.9614, "step": 35340 }, { "epoch": 0.04791929382093316, "grad_norm": 0.04111568629741669, "learning_rate": 1.914009044614563e-06, "loss": 6.9607, "step": 35350 }, { "epoch": 0.048171500630517027, "grad_norm": 0.05910142511129379, "learning_rate": 1.9052246103657201e-06, "loss": 6.9711, "step": 35360 }, { "epoch": 0.04842370744010088, "grad_norm": 0.06413992494344711, "learning_rate": 1.8964597196563572e-06, "loss": 6.9795, "step": 35370 }, { "epoch": 0.04867591424968474, "grad_norm": 0.05329260602593422, "learning_rate": 1.887714378583587e-06, "loss": 6.9749, "step": 35380 }, { "epoch": 0.0489281210592686, "grad_norm": 0.06424757093191147, "learning_rate": 1.8789885932309136e-06, "loss": 6.9755, "step": 35390 }, { "epoch": 0.04918032786885246, "grad_norm": 0.05430705100297928, "learning_rate": 1.8702823696682614e-06, "loss": 6.9679, "step": 35400 }, { "epoch": 0.04943253467843632, "grad_norm": 0.05537562817335129, "learning_rate": 1.861595713951919e-06, "loss": 6.9688, "step": 35410 }, { "epoch": 0.04968474148802018, "grad_norm": 0.056035272777080536, "learning_rate": 1.8529286321245864e-06, "loss": 6.9689, "step": 35420 }, { "epoch": 0.049936948297604034, "grad_norm": 0.0643603503704071, "learning_rate": 1.844281130215334e-06, "loss": 6.9761, "step": 35430 }, { "epoch": 0.0501891551071879, "grad_norm": 0.05603364109992981, "learning_rate": 1.8356532142396031e-06, "loss": 6.9758, "step": 35440 }, { "epoch": 0.05044136191677175, "grad_norm": 0.06188530474901199, "learning_rate": 1.8270448901992366e-06, "loss": 6.9737, "step": 35450 }, { "epoch": 0.05069356872635561, "grad_norm": 0.04376628249883652, "learning_rate": 1.8184561640824204e-06, "loss": 6.9637, "step": 35460 }, { "epoch": 0.05094577553593947, "grad_norm": 0.043645236641168594, "learning_rate": 1.809887041863736e-06, "loss": 6.9569, "step": 35470 }, { "epoch": 0.05119798234552333, "grad_norm": 0.04710662364959717, "learning_rate": 1.8013375295041046e-06, "loss": 6.9641, "step": 35480 }, { "epoch": 0.05145018915510719, "grad_norm": 0.056112341582775116, "learning_rate": 1.7928076329508192e-06, "loss": 6.9741, "step": 35490 }, { "epoch": 0.05170239596469105, "grad_norm": 0.05628994479775429, "learning_rate": 1.78429735813751e-06, "loss": 6.973, "step": 35500 }, { "epoch": 0.051954602774274904, "grad_norm": 0.04570821300148964, "learning_rate": 1.775806710984188e-06, "loss": 6.9729, "step": 35510 }, { "epoch": 0.05220680958385877, "grad_norm": 0.05017935857176781, "learning_rate": 1.7673356973971788e-06, "loss": 6.9619, "step": 35520 }, { "epoch": 0.05245901639344262, "grad_norm": 0.04224899411201477, "learning_rate": 1.758884323269182e-06, "loss": 6.9614, "step": 35530 }, { "epoch": 0.05271122320302648, "grad_norm": 0.05653410404920578, "learning_rate": 1.7504525944791983e-06, "loss": 6.9694, "step": 35540 }, { "epoch": 0.05296343001261034, "grad_norm": 0.05784852057695389, "learning_rate": 1.7420405168925957e-06, "loss": 6.9708, "step": 35550 }, { "epoch": 0.0532156368221942, "grad_norm": 0.05750899761915207, "learning_rate": 1.7336480963610569e-06, "loss": 6.9702, "step": 35560 }, { "epoch": 0.05346784363177806, "grad_norm": 0.06279376894235611, "learning_rate": 1.725275338722585e-06, "loss": 6.9754, "step": 35570 }, { "epoch": 0.05372005044136192, "grad_norm": 0.06630241125822067, "learning_rate": 1.7169222498015248e-06, "loss": 6.982, "step": 35580 }, { "epoch": 0.053972257250945774, "grad_norm": 0.05906856432557106, "learning_rate": 1.7085888354085211e-06, "loss": 6.9769, "step": 35590 }, { "epoch": 0.05422446406052964, "grad_norm": 0.06108551472425461, "learning_rate": 1.7002751013405338e-06, "loss": 6.9739, "step": 35600 }, { "epoch": 0.05447667087011349, "grad_norm": 0.06447786837816238, "learning_rate": 1.69198105338084e-06, "loss": 6.9747, "step": 35610 }, { "epoch": 0.05472887767969735, "grad_norm": 0.06034597009420395, "learning_rate": 1.6837066972990246e-06, "loss": 6.9745, "step": 35620 }, { "epoch": 0.05498108448928121, "grad_norm": 0.050912849605083466, "learning_rate": 1.6754520388509663e-06, "loss": 6.9701, "step": 35630 }, { "epoch": 0.05523329129886507, "grad_norm": 0.05876642465591431, "learning_rate": 1.6672170837788459e-06, "loss": 6.9683, "step": 35640 }, { "epoch": 0.05548549810844893, "grad_norm": 0.06378325819969177, "learning_rate": 1.6590018378111271e-06, "loss": 6.978, "step": 35650 }, { "epoch": 0.05573770491803279, "grad_norm": 0.06535457819700241, "learning_rate": 1.6508063066625855e-06, "loss": 6.9802, "step": 35660 }, { "epoch": 0.055989911727616644, "grad_norm": 0.0605001375079155, "learning_rate": 1.6426304960342608e-06, "loss": 6.9783, "step": 35670 }, { "epoch": 0.05624211853720051, "grad_norm": 0.06158548966050148, "learning_rate": 1.6344744116134935e-06, "loss": 6.9806, "step": 35680 }, { "epoch": 0.05649432534678436, "grad_norm": 0.05915416032075882, "learning_rate": 1.6263380590738919e-06, "loss": 6.9745, "step": 35690 }, { "epoch": 0.05674653215636822, "grad_norm": 0.052964840084314346, "learning_rate": 1.6182214440753219e-06, "loss": 6.9706, "step": 35700 }, { "epoch": 0.05699873896595208, "grad_norm": 0.04924292862415314, "learning_rate": 1.6101245722639535e-06, "loss": 6.9714, "step": 35710 }, { "epoch": 0.05725094577553594, "grad_norm": 0.067033551633358, "learning_rate": 1.6020474492721948e-06, "loss": 6.9745, "step": 35720 }, { "epoch": 0.0575031525851198, "grad_norm": 0.06025705114006996, "learning_rate": 1.5939900807187313e-06, "loss": 6.9809, "step": 35730 }, { "epoch": 0.05775535939470366, "grad_norm": 0.04740103706717491, "learning_rate": 1.5859524722085062e-06, "loss": 6.9712, "step": 35740 }, { "epoch": 0.058007566204287514, "grad_norm": 0.043362148106098175, "learning_rate": 1.5779346293327034e-06, "loss": 6.9647, "step": 35750 }, { "epoch": 0.05825977301387138, "grad_norm": 0.05961433798074722, "learning_rate": 1.5699365576687718e-06, "loss": 6.973, "step": 35760 }, { "epoch": 0.05851197982345523, "grad_norm": 0.05776470527052879, "learning_rate": 1.5619582627804007e-06, "loss": 6.9766, "step": 35770 }, { "epoch": 0.05876418663303909, "grad_norm": 0.06536798924207687, "learning_rate": 1.5539997502175208e-06, "loss": 6.9775, "step": 35780 }, { "epoch": 0.05901639344262295, "grad_norm": 0.05253615602850914, "learning_rate": 1.5460610255163166e-06, "loss": 6.9782, "step": 35790 }, { "epoch": 0.05926860025220681, "grad_norm": 0.05941998213529587, "learning_rate": 1.5381420941991774e-06, "loss": 6.9746, "step": 35800 }, { "epoch": 0.05952080706179067, "grad_norm": 0.04916978254914284, "learning_rate": 1.530242961774747e-06, "loss": 6.9647, "step": 35810 }, { "epoch": 0.05977301387137453, "grad_norm": 0.06002652645111084, "learning_rate": 1.5223636337378965e-06, "loss": 6.9716, "step": 35820 }, { "epoch": 0.060025220680958384, "grad_norm": 0.056362085044384, "learning_rate": 1.514504115569705e-06, "loss": 6.974, "step": 35830 }, { "epoch": 0.06027742749054225, "grad_norm": 0.057259947061538696, "learning_rate": 1.5066644127374919e-06, "loss": 6.9754, "step": 35840 }, { "epoch": 0.0605296343001261, "grad_norm": 0.0606803335249424, "learning_rate": 1.4988445306947785e-06, "loss": 6.9724, "step": 35850 }, { "epoch": 0.06078184110970996, "grad_norm": 0.062078092247247696, "learning_rate": 1.4910444748812935e-06, "loss": 6.9738, "step": 35860 }, { "epoch": 0.06103404791929382, "grad_norm": 0.049627892673015594, "learning_rate": 1.4832642507229866e-06, "loss": 6.9654, "step": 35870 }, { "epoch": 0.06128625472887768, "grad_norm": 0.046167418360710144, "learning_rate": 1.4755038636320118e-06, "loss": 6.9653, "step": 35880 }, { "epoch": 0.06153846153846154, "grad_norm": 0.062103599309921265, "learning_rate": 1.4677633190067142e-06, "loss": 6.9696, "step": 35890 }, { "epoch": 0.0617906683480454, "grad_norm": 0.061979953199625015, "learning_rate": 1.4600426222316398e-06, "loss": 6.9738, "step": 35900 }, { "epoch": 0.062042875157629254, "grad_norm": 0.05763555318117142, "learning_rate": 1.4523417786775296e-06, "loss": 6.9757, "step": 35910 }, { "epoch": 0.06229508196721312, "grad_norm": 0.05809900537133217, "learning_rate": 1.444660793701318e-06, "loss": 6.9753, "step": 35920 }, { "epoch": 0.06254728877679698, "grad_norm": 0.06091311201453209, "learning_rate": 1.4369996726461142e-06, "loss": 6.9783, "step": 35930 }, { "epoch": 0.06279949558638083, "grad_norm": 0.0640452578663826, "learning_rate": 1.429358420841216e-06, "loss": 6.9773, "step": 35940 }, { "epoch": 0.06305170239596469, "grad_norm": 0.06372689455747604, "learning_rate": 1.4217370436021105e-06, "loss": 6.9797, "step": 35950 }, { "epoch": 0.06330390920554856, "grad_norm": 0.06926480680704117, "learning_rate": 1.414135546230434e-06, "loss": 6.9805, "step": 35960 }, { "epoch": 0.0635561160151324, "grad_norm": 0.06322573870420456, "learning_rate": 1.4065539340140165e-06, "loss": 6.9796, "step": 35970 }, { "epoch": 0.06380832282471627, "grad_norm": 0.05067646503448486, "learning_rate": 1.3989922122268394e-06, "loss": 6.9817, "step": 35980 }, { "epoch": 0.06406052963430013, "grad_norm": 0.053131602704524994, "learning_rate": 1.3914503861290683e-06, "loss": 6.9684, "step": 35990 }, { "epoch": 0.06431273644388398, "grad_norm": 0.04057972505688667, "learning_rate": 1.3839284609670066e-06, "loss": 6.9638, "step": 36000 }, { "epoch": 0.06456494325346784, "grad_norm": 0.027721015736460686, "learning_rate": 1.3771757479461044e-06, "loss": 6.9501, "step": 36010 }, { "epoch": 0.0648171500630517, "grad_norm": 0.04555834084749222, "learning_rate": 1.3696916489659028e-06, "loss": 6.9628, "step": 36020 }, { "epoch": 0.06506935687263556, "grad_norm": 0.05329059809446335, "learning_rate": 1.3622274660574163e-06, "loss": 6.9619, "step": 36030 }, { "epoch": 0.06532156368221942, "grad_norm": 0.05368724837899208, "learning_rate": 1.3547832044129548e-06, "loss": 6.9691, "step": 36040 }, { "epoch": 0.06557377049180328, "grad_norm": 0.06612929701805115, "learning_rate": 1.347358869210953e-06, "loss": 6.9745, "step": 36050 }, { "epoch": 0.06582597730138713, "grad_norm": 0.05810130015015602, "learning_rate": 1.3399544656160057e-06, "loss": 6.9747, "step": 36060 }, { "epoch": 0.066078184110971, "grad_norm": 0.06009245291352272, "learning_rate": 1.3325699987788199e-06, "loss": 6.9751, "step": 36070 }, { "epoch": 0.06633039092055486, "grad_norm": 0.06301013380289078, "learning_rate": 1.3252054738362463e-06, "loss": 6.9748, "step": 36080 }, { "epoch": 0.06658259773013872, "grad_norm": 0.06606108695268631, "learning_rate": 1.317860895911267e-06, "loss": 6.9796, "step": 36090 }, { "epoch": 0.06683480453972257, "grad_norm": 0.054345931857824326, "learning_rate": 1.3105362701129752e-06, "loss": 6.9766, "step": 36100 }, { "epoch": 0.06708701134930643, "grad_norm": 0.05369538441300392, "learning_rate": 1.3032316015365986e-06, "loss": 6.97, "step": 36110 }, { "epoch": 0.0673392181588903, "grad_norm": 0.05504725128412247, "learning_rate": 1.295946895263479e-06, "loss": 6.9717, "step": 36120 }, { "epoch": 0.06759142496847415, "grad_norm": 0.052894867956638336, "learning_rate": 1.288682156361063e-06, "loss": 6.9671, "step": 36130 }, { "epoch": 0.06784363177805801, "grad_norm": 0.060158245265483856, "learning_rate": 1.2814373898829147e-06, "loss": 6.9756, "step": 36140 }, { "epoch": 0.06809583858764187, "grad_norm": 0.0658879354596138, "learning_rate": 1.2742126008687028e-06, "loss": 6.9789, "step": 36150 }, { "epoch": 0.06834804539722572, "grad_norm": 0.05808493494987488, "learning_rate": 1.2670077943442037e-06, "loss": 6.9777, "step": 36160 }, { "epoch": 0.06860025220680958, "grad_norm": 0.05895955115556717, "learning_rate": 1.2598229753212887e-06, "loss": 6.9738, "step": 36170 }, { "epoch": 0.06885245901639345, "grad_norm": 0.05554460734128952, "learning_rate": 1.2526581487979227e-06, "loss": 6.9732, "step": 36180 }, { "epoch": 0.0691046658259773, "grad_norm": 0.06617891788482666, "learning_rate": 1.2455133197581658e-06, "loss": 6.9722, "step": 36190 }, { "epoch": 0.06935687263556116, "grad_norm": 0.03888672590255737, "learning_rate": 1.2383884931721723e-06, "loss": 6.9595, "step": 36200 }, { "epoch": 0.06960907944514502, "grad_norm": 0.048166424036026, "learning_rate": 1.2312836739961742e-06, "loss": 6.9549, "step": 36210 }, { "epoch": 0.06986128625472887, "grad_norm": 0.06028318405151367, "learning_rate": 1.224198867172498e-06, "loss": 6.9644, "step": 36220 }, { "epoch": 0.07011349306431273, "grad_norm": 0.03562524542212486, "learning_rate": 1.2171340776295249e-06, "loss": 6.9634, "step": 36230 }, { "epoch": 0.0703656998738966, "grad_norm": 0.0489937961101532, "learning_rate": 1.2100893102817401e-06, "loss": 6.9591, "step": 36240 }, { "epoch": 0.07061790668348046, "grad_norm": 0.045674197375774384, "learning_rate": 1.2030645700296838e-06, "loss": 6.9602, "step": 36250 }, { "epoch": 0.07087011349306431, "grad_norm": 0.04796242341399193, "learning_rate": 1.1960598617599671e-06, "loss": 6.9621, "step": 36260 }, { "epoch": 0.07112232030264817, "grad_norm": 0.04890645295381546, "learning_rate": 1.1890751903452724e-06, "loss": 6.9676, "step": 36270 }, { "epoch": 0.07137452711223204, "grad_norm": 0.05518052726984024, "learning_rate": 1.1821105606443361e-06, "loss": 6.9682, "step": 36280 }, { "epoch": 0.07162673392181589, "grad_norm": 0.05198278650641441, "learning_rate": 1.1751659775019563e-06, "loss": 6.9718, "step": 36290 }, { "epoch": 0.07187894073139975, "grad_norm": 0.06426186114549637, "learning_rate": 1.1682414457489855e-06, "loss": 6.9785, "step": 36300 }, { "epoch": 0.07213114754098361, "grad_norm": 0.06777923554182053, "learning_rate": 1.1613369702023336e-06, "loss": 6.9796, "step": 36310 }, { "epoch": 0.07238335435056746, "grad_norm": 0.06948103755712509, "learning_rate": 1.1544525556649487e-06, "loss": 6.9807, "step": 36320 }, { "epoch": 0.07263556116015132, "grad_norm": 0.053901322185993195, "learning_rate": 1.147588206925827e-06, "loss": 6.9807, "step": 36330 }, { "epoch": 0.07288776796973519, "grad_norm": 0.053259458392858505, "learning_rate": 1.1407439287600118e-06, "loss": 6.968, "step": 36340 }, { "epoch": 0.07313997477931904, "grad_norm": 0.05478078871965408, "learning_rate": 1.133919725928585e-06, "loss": 6.9678, "step": 36350 }, { "epoch": 0.0733921815889029, "grad_norm": 0.0629095509648323, "learning_rate": 1.127115603178649e-06, "loss": 6.9727, "step": 36360 }, { "epoch": 0.07364438839848676, "grad_norm": 0.06778573989868164, "learning_rate": 1.120331565243361e-06, "loss": 6.9801, "step": 36370 }, { "epoch": 0.07389659520807061, "grad_norm": 0.05600986257195473, "learning_rate": 1.1135676168418896e-06, "loss": 6.9765, "step": 36380 }, { "epoch": 0.07414880201765447, "grad_norm": 0.06684261560440063, "learning_rate": 1.106823762679431e-06, "loss": 6.977, "step": 36390 }, { "epoch": 0.07440100882723834, "grad_norm": 0.06634944677352905, "learning_rate": 1.1001000074472023e-06, "loss": 6.9789, "step": 36400 }, { "epoch": 0.0746532156368222, "grad_norm": 0.05875572934746742, "learning_rate": 1.093396355822449e-06, "loss": 6.9748, "step": 36410 }, { "epoch": 0.07490542244640605, "grad_norm": 0.0554349347949028, "learning_rate": 1.0867128124684279e-06, "loss": 6.9718, "step": 36420 }, { "epoch": 0.07515762925598991, "grad_norm": 0.03326117619872093, "learning_rate": 1.080049382034396e-06, "loss": 6.9567, "step": 36430 }, { "epoch": 0.07540983606557378, "grad_norm": 0.062230877578258514, "learning_rate": 1.0734060691556358e-06, "loss": 6.9589, "step": 36440 }, { "epoch": 0.07566204287515763, "grad_norm": 0.0639413371682167, "learning_rate": 1.0667828784534206e-06, "loss": 6.9764, "step": 36450 }, { "epoch": 0.07591424968474149, "grad_norm": 0.0600866936147213, "learning_rate": 1.0601798145350416e-06, "loss": 6.9745, "step": 36460 }, { "epoch": 0.07616645649432535, "grad_norm": 0.0574871264398098, "learning_rate": 1.053596881993778e-06, "loss": 6.9742, "step": 36470 }, { "epoch": 0.0764186633039092, "grad_norm": 0.06414974480867386, "learning_rate": 1.0470340854089134e-06, "loss": 6.9764, "step": 36480 }, { "epoch": 0.07667087011349306, "grad_norm": 0.05264904722571373, "learning_rate": 1.0404914293457057e-06, "loss": 6.9759, "step": 36490 }, { "epoch": 0.07692307692307693, "grad_norm": 0.058397743850946426, "learning_rate": 1.033968918355428e-06, "loss": 6.972, "step": 36500 }, { "epoch": 0.07717528373266078, "grad_norm": 0.06208815798163414, "learning_rate": 1.0274665569753238e-06, "loss": 6.9737, "step": 36510 }, { "epoch": 0.07742749054224464, "grad_norm": 0.0568314827978611, "learning_rate": 1.0209843497286253e-06, "loss": 6.9759, "step": 36520 }, { "epoch": 0.0776796973518285, "grad_norm": 0.05346301570534706, "learning_rate": 1.0145223011245453e-06, "loss": 6.9684, "step": 36530 }, { "epoch": 0.07793190416141235, "grad_norm": 0.052585750818252563, "learning_rate": 1.008080415658268e-06, "loss": 6.9656, "step": 36540 }, { "epoch": 0.07818411097099622, "grad_norm": 0.06628488749265671, "learning_rate": 1.0016586978109587e-06, "loss": 6.9683, "step": 36550 }, { "epoch": 0.07843631778058008, "grad_norm": 0.06272778660058975, "learning_rate": 9.952571520497444e-07, "loss": 6.9755, "step": 36560 }, { "epoch": 0.07868852459016394, "grad_norm": 0.05862300843000412, "learning_rate": 9.888757828277329e-07, "loss": 6.9737, "step": 36570 }, { "epoch": 0.07894073139974779, "grad_norm": 0.056771211326122284, "learning_rate": 9.825145945839897e-07, "loss": 6.9744, "step": 36580 }, { "epoch": 0.07919293820933165, "grad_norm": 0.05183292180299759, "learning_rate": 9.761735917435356e-07, "loss": 6.973, "step": 36590 }, { "epoch": 0.07944514501891552, "grad_norm": 0.06223604083061218, "learning_rate": 9.69852778717355e-07, "loss": 6.9728, "step": 36600 }, { "epoch": 0.07969735182849937, "grad_norm": 0.05201101675629616, "learning_rate": 9.635521599023977e-07, "loss": 6.9725, "step": 36610 }, { "epoch": 0.07994955863808323, "grad_norm": 0.05786752328276634, "learning_rate": 9.572717396815477e-07, "loss": 6.9754, "step": 36620 }, { "epoch": 0.08020176544766709, "grad_norm": 0.039316412061452866, "learning_rate": 9.510115224236538e-07, "loss": 6.9696, "step": 36630 }, { "epoch": 0.08045397225725094, "grad_norm": 0.04706139117479324, "learning_rate": 9.447715124834988e-07, "loss": 6.9587, "step": 36640 }, { "epoch": 0.0807061790668348, "grad_norm": 0.045443445444107056, "learning_rate": 9.385517142018207e-07, "loss": 6.9664, "step": 36650 }, { "epoch": 0.08095838587641867, "grad_norm": 0.04944414272904396, "learning_rate": 9.323521319052819e-07, "loss": 6.9646, "step": 36660 }, { "epoch": 0.08121059268600252, "grad_norm": 0.05442950129508972, "learning_rate": 9.261727699065025e-07, "loss": 6.9737, "step": 36670 }, { "epoch": 0.08146279949558638, "grad_norm": 0.058966606855392456, "learning_rate": 9.200136325040176e-07, "loss": 6.9719, "step": 36680 }, { "epoch": 0.08171500630517024, "grad_norm": 0.06745368987321854, "learning_rate": 9.138747239823064e-07, "loss": 6.9784, "step": 36690 }, { "epoch": 0.08196721311475409, "grad_norm": 0.05926361307501793, "learning_rate": 9.077560486117664e-07, "loss": 6.9754, "step": 36700 }, { "epoch": 0.08221941992433796, "grad_norm": 0.058405764400959015, "learning_rate": 9.016576106487296e-07, "loss": 6.9755, "step": 36710 }, { "epoch": 0.08247162673392182, "grad_norm": 0.05991512909531593, "learning_rate": 8.955794143354423e-07, "loss": 6.9727, "step": 36720 }, { "epoch": 0.08272383354350568, "grad_norm": 0.0555274561047554, "learning_rate": 8.895214639000793e-07, "loss": 6.9753, "step": 36730 }, { "epoch": 0.08297604035308953, "grad_norm": 0.0643339678645134, "learning_rate": 8.834837635567261e-07, "loss": 6.9772, "step": 36740 }, { "epoch": 0.0832282471626734, "grad_norm": 0.06487143784761429, "learning_rate": 8.774663175053732e-07, "loss": 6.9802, "step": 36750 }, { "epoch": 0.00025220680958385876, "grad_norm": 0.05970239266753197, "learning_rate": 8.714691299319389e-07, "loss": 6.9788, "step": 36760 }, { "epoch": 0.0005044136191677175, "grad_norm": 0.04112258180975914, "learning_rate": 8.654922050082359e-07, "loss": 6.9632, "step": 36770 }, { "epoch": 0.0007566204287515763, "grad_norm": 0.051812440156936646, "learning_rate": 8.595355468919918e-07, "loss": 6.9595, "step": 36780 }, { "epoch": 0.001008827238335435, "grad_norm": 0.08404647558927536, "learning_rate": 8.535991597268289e-07, "loss": 6.973, "step": 36790 }, { "epoch": 0.0012610340479192938, "grad_norm": 0.03817914426326752, "learning_rate": 8.47683047642267e-07, "loss": 6.9703, "step": 36800 }, { "epoch": 0.0015132408575031526, "grad_norm": 0.03519069775938988, "learning_rate": 8.417872147537242e-07, "loss": 6.9458, "step": 36810 }, { "epoch": 0.0017654476670870113, "grad_norm": 0.0359061025083065, "learning_rate": 8.359116651625132e-07, "loss": 6.9504, "step": 36820 }, { "epoch": 0.00201765447667087, "grad_norm": 0.061768777668476105, "learning_rate": 8.300564029558444e-07, "loss": 6.9659, "step": 36830 }, { "epoch": 0.002269861286254729, "grad_norm": 0.05887386202812195, "learning_rate": 8.242214322067964e-07, "loss": 6.9765, "step": 36840 }, { "epoch": 0.0025220680958385876, "grad_norm": 0.06391030550003052, "learning_rate": 8.184067569743525e-07, "loss": 6.9778, "step": 36850 }, { "epoch": 0.0027742749054224464, "grad_norm": 0.06429633498191833, "learning_rate": 8.126123813033604e-07, "loss": 6.9756, "step": 36860 }, { "epoch": 0.003026481715006305, "grad_norm": 0.053911175578832626, "learning_rate": 8.068383092245657e-07, "loss": 6.9728, "step": 36870 }, { "epoch": 0.003278688524590164, "grad_norm": 0.06076071038842201, "learning_rate": 8.010845447545723e-07, "loss": 6.9728, "step": 36880 }, { "epoch": 0.0035308953341740227, "grad_norm": 0.057595010846853256, "learning_rate": 7.953510918958684e-07, "loss": 6.9785, "step": 36890 }, { "epoch": 0.0037831021437578815, "grad_norm": 0.06743182986974716, "learning_rate": 7.896379546368137e-07, "loss": 6.9808, "step": 36900 }, { "epoch": 0.00403530895334174, "grad_norm": 0.06097247824072838, "learning_rate": 7.839451369516259e-07, "loss": 6.9758, "step": 36910 }, { "epoch": 0.004287515762925599, "grad_norm": 0.06665864586830139, "learning_rate": 7.782726428003972e-07, "loss": 6.9754, "step": 36920 }, { "epoch": 0.004539722572509458, "grad_norm": 0.056636661291122437, "learning_rate": 7.726204761290745e-07, "loss": 6.971, "step": 36930 }, { "epoch": 0.0047919293820933165, "grad_norm": 0.059308476746082306, "learning_rate": 7.669886408694727e-07, "loss": 6.9711, "step": 36940 }, { "epoch": 0.005044136191677175, "grad_norm": 0.06131893768906593, "learning_rate": 7.613771409392612e-07, "loss": 6.9711, "step": 36950 }, { "epoch": 0.005296343001261034, "grad_norm": 0.06334245204925537, "learning_rate": 7.557859802419609e-07, "loss": 6.9754, "step": 36960 }, { "epoch": 0.005548549810844893, "grad_norm": 0.05851975455880165, "learning_rate": 7.502151626669373e-07, "loss": 6.9769, "step": 36970 }, { "epoch": 0.005800756620428752, "grad_norm": 0.059973184019327164, "learning_rate": 7.44664692089424e-07, "loss": 6.9746, "step": 36980 }, { "epoch": 0.00605296343001261, "grad_norm": 0.060825951397418976, "learning_rate": 7.39134572370479e-07, "loss": 6.9761, "step": 36990 }, { "epoch": 0.006305170239596469, "grad_norm": 0.05691930651664734, "learning_rate": 7.336248073570251e-07, "loss": 6.974, "step": 37000 }, { "epoch": 0.006557377049180328, "grad_norm": 0.0548112615942955, "learning_rate": 7.281354008818031e-07, "loss": 6.9758, "step": 37010 }, { "epoch": 0.006809583858764187, "grad_norm": 0.061814311891794205, "learning_rate": 7.226663567634085e-07, "loss": 6.9734, "step": 37020 }, { "epoch": 0.007061790668348045, "grad_norm": 0.06080836430191994, "learning_rate": 7.172176788062679e-07, "loss": 6.9759, "step": 37030 }, { "epoch": 0.007313997477931904, "grad_norm": 0.05189678445458412, "learning_rate": 7.117893708006362e-07, "loss": 6.9717, "step": 37040 }, { "epoch": 0.007566204287515763, "grad_norm": 0.05803954601287842, "learning_rate": 7.063814365226062e-07, "loss": 6.9674, "step": 37050 }, { "epoch": 0.007818411097099623, "grad_norm": 0.05319157615303993, "learning_rate": 7.009938797340887e-07, "loss": 6.974, "step": 37060 }, { "epoch": 0.00807061790668348, "grad_norm": 0.03871021047234535, "learning_rate": 6.956267041828257e-07, "loss": 6.9677, "step": 37070 }, { "epoch": 0.00832282471626734, "grad_norm": 0.0661730244755745, "learning_rate": 6.902799136023808e-07, "loss": 6.9784, "step": 37080 }, { "epoch": 0.008575031525851198, "grad_norm": 0.05766197293996811, "learning_rate": 6.849535117121386e-07, "loss": 6.9751, "step": 37090 }, { "epoch": 0.008827238335435058, "grad_norm": 0.06414837390184402, "learning_rate": 6.796475022172921e-07, "loss": 6.9764, "step": 37100 }, { "epoch": 0.009079445145018916, "grad_norm": 0.06797587126493454, "learning_rate": 6.74361888808862e-07, "loss": 6.9758, "step": 37110 }, { "epoch": 0.009331651954602775, "grad_norm": 0.06051556020975113, "learning_rate": 6.690966751636707e-07, "loss": 6.9765, "step": 37120 }, { "epoch": 0.009583858764186633, "grad_norm": 0.05539194121956825, "learning_rate": 6.638518649443548e-07, "loss": 6.9754, "step": 37130 }, { "epoch": 0.009836065573770493, "grad_norm": 0.057992611080408096, "learning_rate": 6.586274617993492e-07, "loss": 6.9762, "step": 37140 }, { "epoch": 0.01008827238335435, "grad_norm": 0.05841589719057083, "learning_rate": 6.534234693629138e-07, "loss": 6.9738, "step": 37150 }, { "epoch": 0.01034047919293821, "grad_norm": 0.06515347957611084, "learning_rate": 6.482398912550857e-07, "loss": 6.9767, "step": 37160 }, { "epoch": 0.010592686002522068, "grad_norm": 0.061396777629852295, "learning_rate": 6.430767310817176e-07, "loss": 6.9787, "step": 37170 }, { "epoch": 0.010844892812105928, "grad_norm": 0.04799060523509979, "learning_rate": 6.379339924344463e-07, "loss": 6.9688, "step": 37180 }, { "epoch": 0.011097099621689786, "grad_norm": 0.04546065628528595, "learning_rate": 6.328116788907168e-07, "loss": 6.961, "step": 37190 }, { "epoch": 0.011349306431273645, "grad_norm": 0.05649843439459801, "learning_rate": 6.277097940137555e-07, "loss": 6.9688, "step": 37200 }, { "epoch": 0.011601513240857503, "grad_norm": 0.06673871725797653, "learning_rate": 6.226283413525802e-07, "loss": 6.9748, "step": 37210 }, { "epoch": 0.011853720050441363, "grad_norm": 0.05711662769317627, "learning_rate": 6.175673244420033e-07, "loss": 6.9751, "step": 37220 }, { "epoch": 0.01210592686002522, "grad_norm": 0.054094139486551285, "learning_rate": 6.125267468026019e-07, "loss": 6.9742, "step": 37230 }, { "epoch": 0.01235813366960908, "grad_norm": 0.050132717937231064, "learning_rate": 6.075066119407613e-07, "loss": 6.9701, "step": 37240 }, { "epoch": 0.012610340479192938, "grad_norm": 0.03482967987656593, "learning_rate": 6.025069233486246e-07, "loss": 6.9677, "step": 37250 }, { "epoch": 0.012862547288776798, "grad_norm": 0.054785698652267456, "learning_rate": 5.975276845041266e-07, "loss": 6.9674, "step": 37260 }, { "epoch": 0.013114754098360656, "grad_norm": 0.052708033472299576, "learning_rate": 5.925688988709599e-07, "loss": 6.9745, "step": 37270 }, { "epoch": 0.013366960907944515, "grad_norm": 0.039131153374910355, "learning_rate": 5.876305698986084e-07, "loss": 6.9666, "step": 37280 }, { "epoch": 0.013619167717528373, "grad_norm": 0.036579713225364685, "learning_rate": 5.827127010223143e-07, "loss": 6.9662, "step": 37290 }, { "epoch": 0.013871374527112233, "grad_norm": 0.04592743515968323, "learning_rate": 5.778152956630844e-07, "loss": 6.9596, "step": 37300 }, { "epoch": 0.01412358133669609, "grad_norm": 0.042441047728061676, "learning_rate": 5.72938357227707e-07, "loss": 6.9578, "step": 37310 }, { "epoch": 0.01437578814627995, "grad_norm": 0.06444749236106873, "learning_rate": 5.680818891087113e-07, "loss": 6.9767, "step": 37320 }, { "epoch": 0.014627994955863808, "grad_norm": 0.06654045730829239, "learning_rate": 5.632458946844021e-07, "loss": 6.9796, "step": 37330 }, { "epoch": 0.014880201765447668, "grad_norm": 0.06455570459365845, "learning_rate": 5.584303773188348e-07, "loss": 6.9783, "step": 37340 }, { "epoch": 0.015132408575031526, "grad_norm": 0.054268475621938705, "learning_rate": 5.536353403618233e-07, "loss": 6.9766, "step": 37350 }, { "epoch": 0.015384615384615385, "grad_norm": 0.056222233921289444, "learning_rate": 5.488607871489393e-07, "loss": 6.9758, "step": 37360 }, { "epoch": 0.015636822194199245, "grad_norm": 0.043820735067129135, "learning_rate": 5.441067210014927e-07, "loss": 6.9661, "step": 37370 }, { "epoch": 0.0158890290037831, "grad_norm": 0.060940273106098175, "learning_rate": 5.393731452265515e-07, "loss": 6.9646, "step": 37380 }, { "epoch": 0.01614123581336696, "grad_norm": 0.06833166629076004, "learning_rate": 5.346600631169319e-07, "loss": 6.9795, "step": 37390 }, { "epoch": 0.01639344262295082, "grad_norm": 0.053600527346134186, "learning_rate": 5.299674779511843e-07, "loss": 6.9767, "step": 37400 }, { "epoch": 0.01664564943253468, "grad_norm": 0.05557575821876526, "learning_rate": 5.25295392993611e-07, "loss": 6.9745, "step": 37410 }, { "epoch": 0.016897856242118536, "grad_norm": 0.05751103162765503, "learning_rate": 5.206438114942557e-07, "loss": 6.9743, "step": 37420 }, { "epoch": 0.017150063051702396, "grad_norm": 0.0666055753827095, "learning_rate": 5.160127366888801e-07, "loss": 6.9747, "step": 37430 }, { "epoch": 0.017402269861286256, "grad_norm": 0.06161627918481827, "learning_rate": 5.114021717990037e-07, "loss": 6.9765, "step": 37440 }, { "epoch": 0.017654476670870115, "grad_norm": 0.06165776401758194, "learning_rate": 5.068121200318609e-07, "loss": 6.9752, "step": 37450 }, { "epoch": 0.01790668348045397, "grad_norm": 0.05839773640036583, "learning_rate": 5.022425845804313e-07, "loss": 6.9754, "step": 37460 }, { "epoch": 0.01815889029003783, "grad_norm": 0.05700582638382912, "learning_rate": 4.976935686234186e-07, "loss": 6.9736, "step": 37470 }, { "epoch": 0.01841109709962169, "grad_norm": 0.05397585406899452, "learning_rate": 4.931650753252414e-07, "loss": 6.9713, "step": 37480 }, { "epoch": 0.01866330390920555, "grad_norm": 0.0550408698618412, "learning_rate": 4.886571078360535e-07, "loss": 6.9719, "step": 37490 }, { "epoch": 0.018915510718789406, "grad_norm": 0.06610148400068283, "learning_rate": 4.84169669291733e-07, "loss": 6.9754, "step": 37500 }, { "epoch": 0.019167717528373266, "grad_norm": 0.06704223901033401, "learning_rate": 4.797027628138662e-07, "loss": 6.9794, "step": 37510 }, { "epoch": 0.019419924337957126, "grad_norm": 0.06263812631368637, "learning_rate": 4.752563915097674e-07, "loss": 6.9791, "step": 37520 }, { "epoch": 0.019672131147540985, "grad_norm": 0.07266273349523544, "learning_rate": 4.7083055847245926e-07, "loss": 6.9824, "step": 37530 }, { "epoch": 0.01992433795712484, "grad_norm": 0.04731534421443939, "learning_rate": 4.664252667806723e-07, "loss": 6.9629, "step": 37540 }, { "epoch": 0.0201765447667087, "grad_norm": 0.04123660549521446, "learning_rate": 4.6204051949886863e-07, "loss": 6.9583, "step": 37550 }, { "epoch": 0.02042875157629256, "grad_norm": 0.06089460477232933, "learning_rate": 4.576763196771916e-07, "loss": 6.9635, "step": 37560 }, { "epoch": 0.02068095838587642, "grad_norm": 0.05946940556168556, "learning_rate": 4.533326703515162e-07, "loss": 6.9797, "step": 37570 }, { "epoch": 0.020933165195460277, "grad_norm": 0.06743773072957993, "learning_rate": 4.4900957454340865e-07, "loss": 6.9745, "step": 37580 }, { "epoch": 0.021185372005044136, "grad_norm": 0.05690634623169899, "learning_rate": 4.4470703526013346e-07, "loss": 6.9757, "step": 37590 }, { "epoch": 0.021437578814627996, "grad_norm": 0.06389579176902771, "learning_rate": 4.4042505549466315e-07, "loss": 6.9784, "step": 37600 }, { "epoch": 0.021689785624211855, "grad_norm": 0.05489001423120499, "learning_rate": 4.3616363822567173e-07, "loss": 6.9771, "step": 37610 }, { "epoch": 0.02194199243379571, "grad_norm": 0.06197235360741615, "learning_rate": 4.319227864175246e-07, "loss": 6.9734, "step": 37620 }, { "epoch": 0.02219419924337957, "grad_norm": 0.06423825770616531, "learning_rate": 4.2770250302027524e-07, "loss": 6.9783, "step": 37630 }, { "epoch": 0.02244640605296343, "grad_norm": 0.05459511652588844, "learning_rate": 4.235027909696754e-07, "loss": 6.9691, "step": 37640 }, { "epoch": 0.02269861286254729, "grad_norm": 0.03955674171447754, "learning_rate": 4.193236531871747e-07, "loss": 6.9562, "step": 37650 }, { "epoch": 0.022950819672131147, "grad_norm": 0.0522860586643219, "learning_rate": 4.1516509257989444e-07, "loss": 6.96, "step": 37660 }, { "epoch": 0.023203026481715006, "grad_norm": 0.03772783279418945, "learning_rate": 4.110271120406506e-07, "loss": 6.9687, "step": 37670 }, { "epoch": 0.023455233291298866, "grad_norm": 0.06321393698453903, "learning_rate": 4.0690971444794724e-07, "loss": 6.9676, "step": 37680 }, { "epoch": 0.023707440100882726, "grad_norm": 0.05689220875501633, "learning_rate": 4.028129026659566e-07, "loss": 6.973, "step": 37690 }, { "epoch": 0.02395964691046658, "grad_norm": 0.06648088246583939, "learning_rate": 3.9873667954454907e-07, "loss": 6.9676, "step": 37700 }, { "epoch": 0.02421185372005044, "grad_norm": 0.06802503019571304, "learning_rate": 3.946810479192564e-07, "loss": 6.9809, "step": 37710 }, { "epoch": 0.0244640605296343, "grad_norm": 0.06857651472091675, "learning_rate": 3.906460106113019e-07, "loss": 6.9801, "step": 37720 }, { "epoch": 0.02471626733921816, "grad_norm": 0.058548398315906525, "learning_rate": 3.866315704275636e-07, "loss": 6.9766, "step": 37730 }, { "epoch": 0.024968474148802017, "grad_norm": 0.05840570107102394, "learning_rate": 3.82637730160611e-07, "loss": 6.9737, "step": 37740 }, { "epoch": 0.025220680958385876, "grad_norm": 0.05120199918746948, "learning_rate": 3.7866449258867177e-07, "loss": 6.9741, "step": 37750 }, { "epoch": 0.025472887767969736, "grad_norm": 0.05803387984633446, "learning_rate": 3.7471186047564163e-07, "loss": 6.9731, "step": 37760 }, { "epoch": 0.025725094577553596, "grad_norm": 0.050918277353048325, "learning_rate": 3.707798365710913e-07, "loss": 6.9732, "step": 37770 }, { "epoch": 0.025977301387137452, "grad_norm": 0.05006423220038414, "learning_rate": 3.668684236102526e-07, "loss": 6.9746, "step": 37780 }, { "epoch": 0.02622950819672131, "grad_norm": 0.047100987285375595, "learning_rate": 3.629776243140093e-07, "loss": 6.9639, "step": 37790 }, { "epoch": 0.02648171500630517, "grad_norm": 0.060088641941547394, "learning_rate": 3.591074413889195e-07, "loss": 6.96, "step": 37800 }, { "epoch": 0.02673392181588903, "grad_norm": 0.0671517550945282, "learning_rate": 3.552578775271931e-07, "loss": 6.9762, "step": 37810 }, { "epoch": 0.026986128625472887, "grad_norm": 0.06383860856294632, "learning_rate": 3.5142893540669797e-07, "loss": 6.9788, "step": 37820 }, { "epoch": 0.027238335435056747, "grad_norm": 0.04723281040787697, "learning_rate": 3.476206176909602e-07, "loss": 6.9743, "step": 37830 }, { "epoch": 0.027490542244640606, "grad_norm": 0.061877910047769547, "learning_rate": 3.438329270291507e-07, "loss": 6.9698, "step": 37840 }, { "epoch": 0.027742749054224466, "grad_norm": 0.045353833585977554, "learning_rate": 3.400658660561018e-07, "loss": 6.9699, "step": 37850 }, { "epoch": 0.027994955863808322, "grad_norm": 0.06144651025533676, "learning_rate": 3.3631943739228733e-07, "loss": 6.9722, "step": 37860 }, { "epoch": 0.02824716267339218, "grad_norm": 0.059957198798656464, "learning_rate": 3.325936436438293e-07, "loss": 6.9739, "step": 37870 }, { "epoch": 0.02849936948297604, "grad_norm": 0.0646204873919487, "learning_rate": 3.2888848740250444e-07, "loss": 6.9781, "step": 37880 }, { "epoch": 0.0287515762925599, "grad_norm": 0.06092050299048424, "learning_rate": 3.2520397124572445e-07, "loss": 6.9795, "step": 37890 }, { "epoch": 0.029003783102143757, "grad_norm": 0.025351431220769882, "learning_rate": 3.2154009773654236e-07, "loss": 6.9702, "step": 37900 }, { "epoch": 0.029255989911727617, "grad_norm": 0.04319334775209427, "learning_rate": 3.178968694236528e-07, "loss": 6.9559, "step": 37910 }, { "epoch": 0.029508196721311476, "grad_norm": 0.04704036936163902, "learning_rate": 3.1427428884139855e-07, "loss": 6.9635, "step": 37920 }, { "epoch": 0.029760403530895336, "grad_norm": 0.054643455892801285, "learning_rate": 3.1067235850974373e-07, "loss": 6.9741, "step": 37930 }, { "epoch": 0.030012610340479192, "grad_norm": 0.06774044036865234, "learning_rate": 3.0709108093430416e-07, "loss": 6.9774, "step": 37940 }, { "epoch": 0.03026481715006305, "grad_norm": 0.06425216048955917, "learning_rate": 3.0353045860631035e-07, "loss": 6.9803, "step": 37950 }, { "epoch": 0.03051702395964691, "grad_norm": 0.07147163152694702, "learning_rate": 2.9999049400264435e-07, "loss": 6.9826, "step": 37960 }, { "epoch": 0.03076923076923077, "grad_norm": 0.06686747819185257, "learning_rate": 2.964711895857963e-07, "loss": 6.9806, "step": 37970 }, { "epoch": 0.031021437578814627, "grad_norm": 0.0505274198949337, "learning_rate": 2.9297254780390805e-07, "loss": 6.9734, "step": 37980 }, { "epoch": 0.03127364438839849, "grad_norm": 0.051951803267002106, "learning_rate": 2.894945710907326e-07, "loss": 6.9724, "step": 37990 }, { "epoch": 0.031525851197982346, "grad_norm": 0.058865584433078766, "learning_rate": 2.8603726186564814e-07, "loss": 6.9746, "step": 38000 }, { "epoch": 0.0317780580075662, "grad_norm": 0.06304717808961868, "learning_rate": 2.8294335625342227e-07, "loss": 6.9761, "step": 38010 }, { "epoch": 0.032030264817150066, "grad_norm": 0.06080644577741623, "learning_rate": 2.7952532186961366e-07, "loss": 6.9771, "step": 38020 }, { "epoch": 0.03228247162673392, "grad_norm": 0.06539413332939148, "learning_rate": 2.7612796190879863e-07, "loss": 6.9791, "step": 38030 }, { "epoch": 0.03253467843631778, "grad_norm": 0.06040528789162636, "learning_rate": 2.7275127873427675e-07, "loss": 6.98, "step": 38040 }, { "epoch": 0.03278688524590164, "grad_norm": 0.05012524500489235, "learning_rate": 2.6939527469496907e-07, "loss": 6.9746, "step": 38050 }, { "epoch": 0.0330390920554855, "grad_norm": 0.06169480085372925, "learning_rate": 2.660599521254048e-07, "loss": 6.9749, "step": 38060 }, { "epoch": 0.03329129886506936, "grad_norm": 0.06259097903966904, "learning_rate": 2.6274531334573136e-07, "loss": 6.9762, "step": 38070 }, { "epoch": 0.033543505674653216, "grad_norm": 0.06564566493034363, "learning_rate": 2.594513606617077e-07, "loss": 6.9779, "step": 38080 }, { "epoch": 0.03379571248423707, "grad_norm": 0.07058677077293396, "learning_rate": 2.5617809636470426e-07, "loss": 6.9787, "step": 38090 }, { "epoch": 0.034047919293820936, "grad_norm": 0.06854142993688583, "learning_rate": 2.529255227317029e-07, "loss": 6.9794, "step": 38100 }, { "epoch": 0.03430012610340479, "grad_norm": 0.06674086302518845, "learning_rate": 2.496936420252804e-07, "loss": 6.9794, "step": 38110 }, { "epoch": 0.03455233291298865, "grad_norm": 0.041453007608652115, "learning_rate": 2.4648245649362855e-07, "loss": 6.9675, "step": 38120 }, { "epoch": 0.03480453972257251, "grad_norm": 0.06895328313112259, "learning_rate": 2.4329196837054033e-07, "loss": 6.9706, "step": 38130 }, { "epoch": 0.03505674653215637, "grad_norm": 0.06026417389512062, "learning_rate": 2.401221798754172e-07, "loss": 6.9788, "step": 38140 }, { "epoch": 0.03530895334174023, "grad_norm": 0.06009239703416824, "learning_rate": 2.3697309321325188e-07, "loss": 6.9787, "step": 38150 }, { "epoch": 0.03556116015132409, "grad_norm": 0.05900699272751808, "learning_rate": 2.3384471057463885e-07, "loss": 6.9767, "step": 38160 }, { "epoch": 0.03581336696090794, "grad_norm": 0.06422964483499527, "learning_rate": 2.3073703413577396e-07, "loss": 6.979, "step": 38170 }, { "epoch": 0.036065573770491806, "grad_norm": 0.06818068772554398, "learning_rate": 2.2765006605844462e-07, "loss": 6.9813, "step": 38180 }, { "epoch": 0.03631778058007566, "grad_norm": 0.06442224979400635, "learning_rate": 2.2458380849003645e-07, "loss": 6.9809, "step": 38190 }, { "epoch": 0.03656998738965952, "grad_norm": 0.06225878745317459, "learning_rate": 2.2153826356352992e-07, "loss": 6.9781, "step": 38200 }, { "epoch": 0.03682219419924338, "grad_norm": 0.048866305500268936, "learning_rate": 2.185134333974903e-07, "loss": 6.9742, "step": 38210 }, { "epoch": 0.03707440100882724, "grad_norm": 0.04309205710887909, "learning_rate": 2.1550932009607783e-07, "loss": 6.9679, "step": 38220 }, { "epoch": 0.0373266078184111, "grad_norm": 0.05242859944701195, "learning_rate": 2.1252592574904418e-07, "loss": 6.9641, "step": 38230 }, { "epoch": 0.03757881462799496, "grad_norm": 0.05874130129814148, "learning_rate": 2.0956325243171924e-07, "loss": 6.9716, "step": 38240 }, { "epoch": 0.03783102143757881, "grad_norm": 0.05214005336165428, "learning_rate": 2.0662130220502784e-07, "loss": 6.9704, "step": 38250 }, { "epoch": 0.038083228247162676, "grad_norm": 0.06303450465202332, "learning_rate": 2.0370007711547623e-07, "loss": 6.973, "step": 38260 }, { "epoch": 0.03833543505674653, "grad_norm": 0.06636320799589157, "learning_rate": 2.0079957919515224e-07, "loss": 6.9765, "step": 38270 }, { "epoch": 0.03858764186633039, "grad_norm": 0.060610294342041016, "learning_rate": 1.9791981046172524e-07, "loss": 6.9764, "step": 38280 }, { "epoch": 0.03883984867591425, "grad_norm": 0.014511624351143837, "learning_rate": 1.9506077291844616e-07, "loss": 6.9547, "step": 38290 }, { "epoch": 0.03909205548549811, "grad_norm": 0.03928552195429802, "learning_rate": 1.9222246855414406e-07, "loss": 6.9573, "step": 38300 }, { "epoch": 0.03934426229508197, "grad_norm": 0.06018757075071335, "learning_rate": 1.8940489934322958e-07, "loss": 6.9645, "step": 38310 }, { "epoch": 0.03959646910466583, "grad_norm": 0.05963752418756485, "learning_rate": 1.866080672456816e-07, "loss": 6.9741, "step": 38320 }, { "epoch": 0.03984867591424968, "grad_norm": 0.057792775332927704, "learning_rate": 1.8383197420705378e-07, "loss": 6.9757, "step": 38330 }, { "epoch": 0.040100882723833546, "grad_norm": 0.05642220005393028, "learning_rate": 1.8107662215848474e-07, "loss": 6.9777, "step": 38340 }, { "epoch": 0.0403530895334174, "grad_norm": 0.06158899888396263, "learning_rate": 1.7834201301667463e-07, "loss": 6.9768, "step": 38350 }, { "epoch": 0.04060529634300126, "grad_norm": 0.06496822088956833, "learning_rate": 1.756281486838951e-07, "loss": 6.9781, "step": 38360 }, { "epoch": 0.04085750315258512, "grad_norm": 0.06589928269386292, "learning_rate": 1.7293503104798935e-07, "loss": 6.9758, "step": 38370 }, { "epoch": 0.04110970996216898, "grad_norm": 0.05866653472185135, "learning_rate": 1.7026266198236884e-07, "loss": 6.9769, "step": 38380 }, { "epoch": 0.04136191677175284, "grad_norm": 0.06657690554857254, "learning_rate": 1.6761104334600653e-07, "loss": 6.9816, "step": 38390 }, { "epoch": 0.0416141235813367, "grad_norm": 0.04495171457529068, "learning_rate": 1.6498017698345357e-07, "loss": 6.9546, "step": 38400 }, { "epoch": 0.04186633039092055, "grad_norm": 0.04388529434800148, "learning_rate": 1.623700647248094e-07, "loss": 6.9574, "step": 38410 }, { "epoch": 0.042118537200504416, "grad_norm": 0.06284046918153763, "learning_rate": 1.5978070838574833e-07, "loss": 6.9761, "step": 38420 }, { "epoch": 0.04237074401008827, "grad_norm": 0.05682659521698952, "learning_rate": 1.5721210976749946e-07, "loss": 6.9759, "step": 38430 }, { "epoch": 0.04262295081967213, "grad_norm": 0.0476640909910202, "learning_rate": 1.5466427065685352e-07, "loss": 6.9726, "step": 38440 }, { "epoch": 0.04287515762925599, "grad_norm": 0.05472567304968834, "learning_rate": 1.5213719282616279e-07, "loss": 6.9728, "step": 38450 }, { "epoch": 0.04312736443883985, "grad_norm": 0.0584959052503109, "learning_rate": 1.496308780333344e-07, "loss": 6.9768, "step": 38460 }, { "epoch": 0.04337957124842371, "grad_norm": 0.06206369772553444, "learning_rate": 1.471453280218371e-07, "loss": 6.9758, "step": 38470 }, { "epoch": 0.04363177805800757, "grad_norm": 0.05418163537979126, "learning_rate": 1.4468054452068448e-07, "loss": 6.9717, "step": 38480 }, { "epoch": 0.04388398486759142, "grad_norm": 0.05415283888578415, "learning_rate": 1.4223652924445828e-07, "loss": 6.974, "step": 38490 }, { "epoch": 0.044136191677175286, "grad_norm": 0.06218010187149048, "learning_rate": 1.3981328389328196e-07, "loss": 6.9744, "step": 38500 }, { "epoch": 0.04438839848675914, "grad_norm": 0.06850805133581161, "learning_rate": 1.374108101528404e-07, "loss": 6.9787, "step": 38510 }, { "epoch": 0.044640605296343, "grad_norm": 0.06284654140472412, "learning_rate": 1.3502910969435678e-07, "loss": 6.9763, "step": 38520 }, { "epoch": 0.04489281210592686, "grad_norm": 0.06022137776017189, "learning_rate": 1.326681841746191e-07, "loss": 6.974, "step": 38530 }, { "epoch": 0.04514501891551072, "grad_norm": 0.057214852422475815, "learning_rate": 1.3032803523594706e-07, "loss": 6.972, "step": 38540 }, { "epoch": 0.04539722572509458, "grad_norm": 0.05266409367322922, "learning_rate": 1.280086645062184e-07, "loss": 6.9705, "step": 38550 }, { "epoch": 0.04564943253467844, "grad_norm": 0.061497513204813004, "learning_rate": 1.257100735988592e-07, "loss": 6.9703, "step": 38560 }, { "epoch": 0.04590163934426229, "grad_norm": 0.06406184285879135, "learning_rate": 1.234322641128338e-07, "loss": 6.9767, "step": 38570 }, { "epoch": 0.046153846153846156, "grad_norm": 0.06429808586835861, "learning_rate": 1.2117523763265138e-07, "loss": 6.9774, "step": 38580 }, { "epoch": 0.04640605296343001, "grad_norm": 0.06874383240938187, "learning_rate": 1.1893899572836264e-07, "loss": 6.9799, "step": 38590 }, { "epoch": 0.04665825977301387, "grad_norm": 0.062087446451187134, "learning_rate": 1.1672353995556328e-07, "loss": 6.9777, "step": 38600 }, { "epoch": 0.04691046658259773, "grad_norm": 0.06574809551239014, "learning_rate": 1.1452887185539052e-07, "loss": 6.9813, "step": 38610 }, { "epoch": 0.04716267339218159, "grad_norm": 0.061682138592004776, "learning_rate": 1.1235499295451979e-07, "loss": 6.9789, "step": 38620 }, { "epoch": 0.04741488020176545, "grad_norm": 0.06054854765534401, "learning_rate": 1.1020190476516478e-07, "loss": 6.9762, "step": 38630 }, { "epoch": 0.04766708701134931, "grad_norm": 0.05454283207654953, "learning_rate": 1.0806960878507077e-07, "loss": 6.9769, "step": 38640 }, { "epoch": 0.04791929382093316, "grad_norm": 0.048773445188999176, "learning_rate": 1.0595810649753123e-07, "loss": 6.969, "step": 38650 }, { "epoch": 0.048171500630517027, "grad_norm": 0.05642795190215111, "learning_rate": 1.0386739937136124e-07, "loss": 6.9648, "step": 38660 }, { "epoch": 0.04842370744010088, "grad_norm": 0.06313741207122803, "learning_rate": 1.0179748886092077e-07, "loss": 6.9776, "step": 38670 }, { "epoch": 0.04867591424968474, "grad_norm": 0.060432080179452896, "learning_rate": 9.974837640610135e-08, "loss": 6.9761, "step": 38680 }, { "epoch": 0.0489281210592686, "grad_norm": 0.05790164694190025, "learning_rate": 9.772006343232609e-08, "loss": 6.9728, "step": 38690 }, { "epoch": 0.04918032786885246, "grad_norm": 0.06714337319135666, "learning_rate": 9.571255135053969e-08, "loss": 6.9721, "step": 38700 }, { "epoch": 0.04943253467843632, "grad_norm": 0.06441647559404373, "learning_rate": 9.372584155723507e-08, "loss": 6.978, "step": 38710 }, { "epoch": 0.04968474148802018, "grad_norm": 0.0618634894490242, "learning_rate": 9.175993543442008e-08, "loss": 6.9774, "step": 38720 }, { "epoch": 0.049936948297604034, "grad_norm": 0.06327604502439499, "learning_rate": 8.981483434963745e-08, "loss": 6.9783, "step": 38730 }, { "epoch": 0.0501891551071879, "grad_norm": 0.06546623259782791, "learning_rate": 8.789053965595483e-08, "loss": 6.9802, "step": 38740 }, { "epoch": 0.05044136191677175, "grad_norm": 0.049919016659259796, "learning_rate": 8.598705269197149e-08, "loss": 6.9616, "step": 38750 }, { "epoch": 0.05069356872635561, "grad_norm": 0.04772001504898071, "learning_rate": 8.410437478180155e-08, "loss": 6.9611, "step": 38760 }, { "epoch": 0.05094577553593947, "grad_norm": 0.0631062462925911, "learning_rate": 8.224250723509408e-08, "loss": 6.9747, "step": 38770 }, { "epoch": 0.05119798234552333, "grad_norm": 0.041697416454553604, "learning_rate": 8.040145134701638e-08, "loss": 6.9719, "step": 38780 }, { "epoch": 0.05145018915510719, "grad_norm": 0.060652051120996475, "learning_rate": 7.8581208398264e-08, "loss": 6.9727, "step": 38790 }, { "epoch": 0.05170239596469105, "grad_norm": 0.03843947872519493, "learning_rate": 7.67817796550474e-08, "loss": 6.9642, "step": 38800 }, { "epoch": 0.051954602774274904, "grad_norm": 0.04584541544318199, "learning_rate": 7.500316636910199e-08, "loss": 6.9575, "step": 38810 }, { "epoch": 0.05220680958385877, "grad_norm": 0.04371940717101097, "learning_rate": 7.324536977768136e-08, "loss": 6.9619, "step": 38820 }, { "epoch": 0.05245901639344262, "grad_norm": 0.06697985529899597, "learning_rate": 7.150839110356077e-08, "loss": 6.977, "step": 38830 }, { "epoch": 0.05271122320302648, "grad_norm": 0.06289415806531906, "learning_rate": 6.9792231555037e-08, "loss": 6.9796, "step": 38840 }, { "epoch": 0.05296343001261034, "grad_norm": 0.0650937557220459, "learning_rate": 6.809689232591176e-08, "loss": 6.9794, "step": 38850 }, { "epoch": 0.0532156368221942, "grad_norm": 0.06611385941505432, "learning_rate": 6.642237459551836e-08, "loss": 6.9799, "step": 38860 }, { "epoch": 0.05346784363177806, "grad_norm": 0.05430043488740921, "learning_rate": 6.476867952869835e-08, "loss": 6.9784, "step": 38870 }, { "epoch": 0.05372005044136192, "grad_norm": 0.05929255485534668, "learning_rate": 6.313580827581155e-08, "loss": 6.9744, "step": 38880 }, { "epoch": 0.053972257250945774, "grad_norm": 0.04991510510444641, "learning_rate": 6.152376197272935e-08, "loss": 6.9741, "step": 38890 }, { "epoch": 0.05422446406052964, "grad_norm": 0.056385286152362823, "learning_rate": 5.993254174083474e-08, "loss": 6.9746, "step": 38900 }, { "epoch": 0.05447667087011349, "grad_norm": 0.05884915962815285, "learning_rate": 5.8362148687032266e-08, "loss": 6.9731, "step": 38910 }, { "epoch": 0.05472887767969735, "grad_norm": 0.05622541531920433, "learning_rate": 5.681258390372812e-08, "loss": 6.9749, "step": 38920 }, { "epoch": 0.05498108448928121, "grad_norm": 0.058649156242609024, "learning_rate": 5.5283848468843376e-08, "loss": 6.9723, "step": 38930 }, { "epoch": 0.05523329129886507, "grad_norm": 0.06085497885942459, "learning_rate": 5.377594344581738e-08, "loss": 6.9731, "step": 38940 }, { "epoch": 0.05548549810844893, "grad_norm": 0.05194535851478577, "learning_rate": 5.228886988358772e-08, "loss": 6.9622, "step": 38950 }, { "epoch": 0.05573770491803279, "grad_norm": 0.050249189138412476, "learning_rate": 5.082262881660693e-08, "loss": 6.9726, "step": 38960 }, { "epoch": 0.055989911727616644, "grad_norm": 0.042298704385757446, "learning_rate": 4.937722126483246e-08, "loss": 6.9623, "step": 38970 }, { "epoch": 0.05624211853720051, "grad_norm": 0.04956170544028282, "learning_rate": 4.7952648233733355e-08, "loss": 6.9611, "step": 38980 }, { "epoch": 0.05649432534678436, "grad_norm": 0.06304704397916794, "learning_rate": 4.654891071428691e-08, "loss": 6.9754, "step": 38990 }, { "epoch": 0.05674653215636822, "grad_norm": 0.0671352669596672, "learning_rate": 4.516600968296869e-08, "loss": 6.9801, "step": 39000 }, { "epoch": 0.05699873896595208, "grad_norm": 0.06367508322000504, "learning_rate": 4.3803946101769185e-08, "loss": 6.9772, "step": 39010 }, { "epoch": 0.05725094577553594, "grad_norm": 0.06574852019548416, "learning_rate": 4.2462720918177155e-08, "loss": 6.98, "step": 39020 }, { "epoch": 0.0575031525851198, "grad_norm": 0.06663752347230911, "learning_rate": 4.1142335065186276e-08, "loss": 6.9812, "step": 39030 }, { "epoch": 0.05775535939470366, "grad_norm": 0.06845145672559738, "learning_rate": 3.984278946129516e-08, "loss": 6.9812, "step": 39040 }, { "epoch": 0.058007566204287514, "grad_norm": 0.06344448775053024, "learning_rate": 3.856408501051068e-08, "loss": 6.9778, "step": 39050 }, { "epoch": 0.05825977301387138, "grad_norm": 0.06409602612257004, "learning_rate": 3.730622260232797e-08, "loss": 6.976, "step": 39060 }, { "epoch": 0.05851197982345523, "grad_norm": 0.06244668737053871, "learning_rate": 3.606920311176043e-08, "loss": 6.9775, "step": 39070 }, { "epoch": 0.05876418663303909, "grad_norm": 0.06641269475221634, "learning_rate": 3.4853027399309734e-08, "loss": 6.9759, "step": 39080 }, { "epoch": 0.05901639344262295, "grad_norm": 0.05745202675461769, "learning_rate": 3.365769631098581e-08, "loss": 6.9749, "step": 39090 }, { "epoch": 0.05926860025220681, "grad_norm": 0.0630771741271019, "learning_rate": 3.2483210678293517e-08, "loss": 6.9779, "step": 39100 }, { "epoch": 0.05952080706179067, "grad_norm": 0.024052657186985016, "learning_rate": 3.1329571318242655e-08, "loss": 6.971, "step": 39110 }, { "epoch": 0.05977301387137453, "grad_norm": 0.06401780247688293, "learning_rate": 3.019677903333462e-08, "loss": 6.958, "step": 39120 }, { "epoch": 0.060025220680958384, "grad_norm": 0.06323450058698654, "learning_rate": 2.908483461156908e-08, "loss": 6.9763, "step": 39130 }, { "epoch": 0.06027742749054225, "grad_norm": 0.049276337027549744, "learning_rate": 2.799373882645728e-08, "loss": 6.9692, "step": 39140 }, { "epoch": 0.0605296343001261, "grad_norm": 0.047329071909189224, "learning_rate": 2.692349243698877e-08, "loss": 6.9635, "step": 39150 }, { "epoch": 0.06078184110970996, "grad_norm": 0.049370016902685165, "learning_rate": 2.5874096187658013e-08, "loss": 6.9644, "step": 39160 }, { "epoch": 0.06103404791929382, "grad_norm": 0.05196312442421913, "learning_rate": 2.484555080845774e-08, "loss": 6.9698, "step": 39170 }, { "epoch": 0.06128625472887768, "grad_norm": 0.0542772077023983, "learning_rate": 2.3837857014875618e-08, "loss": 6.9691, "step": 39180 }, { "epoch": 0.06153846153846154, "grad_norm": 0.053385283797979355, "learning_rate": 2.2851015507890926e-08, "loss": 6.9692, "step": 39190 }, { "epoch": 0.0617906683480454, "grad_norm": 0.06072819605469704, "learning_rate": 2.1885026973977873e-08, "loss": 6.9732, "step": 39200 }, { "epoch": 0.062042875157629254, "grad_norm": 0.05676225572824478, "learning_rate": 2.0939892085108937e-08, "loss": 6.9738, "step": 39210 }, { "epoch": 0.06229508196721312, "grad_norm": 0.058675315231084824, "learning_rate": 2.0015611498741536e-08, "loss": 6.9747, "step": 39220 }, { "epoch": 0.06254728877679698, "grad_norm": 0.06321834772825241, "learning_rate": 1.9112185857841358e-08, "loss": 6.976, "step": 39230 }, { "epoch": 0.06279949558638083, "grad_norm": 0.06990792602300644, "learning_rate": 1.822961579084903e-08, "loss": 6.9783, "step": 39240 }, { "epoch": 0.06305170239596469, "grad_norm": 0.06044134497642517, "learning_rate": 1.7367901911710115e-08, "loss": 6.9772, "step": 39250 }, { "epoch": 0.06330390920554856, "grad_norm": 0.05692950263619423, "learning_rate": 1.6527044819855118e-08, "loss": 6.9741, "step": 39260 }, { "epoch": 0.0635561160151324, "grad_norm": 0.062203582376241684, "learning_rate": 1.5707045100209484e-08, "loss": 6.9748, "step": 39270 }, { "epoch": 0.06380832282471627, "grad_norm": 0.06330915540456772, "learning_rate": 1.4907903323190254e-08, "loss": 6.9774, "step": 39280 }, { "epoch": 0.06406052963430013, "grad_norm": 0.06159600988030434, "learning_rate": 1.412962004470275e-08, "loss": 6.9787, "step": 39290 }, { "epoch": 0.06431273644388398, "grad_norm": 0.05570777878165245, "learning_rate": 1.3372195806143906e-08, "loss": 6.9722, "step": 39300 }, { "epoch": 0.06456494325346784, "grad_norm": 0.05759734287858009, "learning_rate": 1.2635631134398918e-08, "loss": 6.9731, "step": 39310 }, { "epoch": 0.0648171500630517, "grad_norm": 0.06553798913955688, "learning_rate": 1.1919926541844594e-08, "loss": 6.9779, "step": 39320 }, { "epoch": 0.06506935687263556, "grad_norm": 0.06440369039773941, "learning_rate": 1.122508252634269e-08, "loss": 6.9799, "step": 39330 }, { "epoch": 0.06532156368221942, "grad_norm": 0.06663090735673904, "learning_rate": 1.055109957125322e-08, "loss": 6.9783, "step": 39340 }, { "epoch": 0.06557377049180328, "grad_norm": 0.05232470855116844, "learning_rate": 9.897978145411157e-09, "loss": 6.9751, "step": 39350 }, { "epoch": 0.06582597730138713, "grad_norm": 0.06039911136031151, "learning_rate": 9.26571870314974e-09, "loss": 6.9769, "step": 39360 }, { "epoch": 0.066078184110971, "grad_norm": 0.05627429857850075, "learning_rate": 8.654321684287148e-09, "loss": 6.9764, "step": 39370 }, { "epoch": 0.06633039092055486, "grad_norm": 0.06161791458725929, "learning_rate": 8.063787514129838e-09, "loss": 6.9749, "step": 39380 }, { "epoch": 0.06658259773013872, "grad_norm": 0.05330139398574829, "learning_rate": 7.494116603469214e-09, "loss": 6.974, "step": 39390 }, { "epoch": 0.06683480453972257, "grad_norm": 0.06064677610993385, "learning_rate": 6.945309348584949e-09, "loss": 6.9755, "step": 39400 }, { "epoch": 0.06708701134930643, "grad_norm": 0.05734526738524437, "learning_rate": 6.4173661312449946e-09, "loss": 6.9758, "step": 39410 }, { "epoch": 0.0673392181588903, "grad_norm": 0.03940802440047264, "learning_rate": 5.910287318695584e-09, "loss": 6.9748, "step": 39420 }, { "epoch": 0.06759142496847415, "grad_norm": 0.05531793460249901, "learning_rate": 5.424073263684548e-09, "loss": 6.9692, "step": 39430 }, { "epoch": 0.06784363177805801, "grad_norm": 0.05530742183327675, "learning_rate": 4.9587243044313394e-09, "loss": 6.9735, "step": 39440 }, { "epoch": 0.06809583858764187, "grad_norm": 0.034060437232255936, "learning_rate": 4.514240764643685e-09, "loss": 6.9555, "step": 39450 }, { "epoch": 0.06834804539722572, "grad_norm": 0.03375169262290001, "learning_rate": 4.0906229535242475e-09, "loss": 6.9564, "step": 39460 }, { "epoch": 0.06860025220680958, "grad_norm": 0.04518752917647362, "learning_rate": 3.687871165747314e-09, "loss": 6.9526, "step": 39470 }, { "epoch": 0.06885245901639345, "grad_norm": 0.05220139026641846, "learning_rate": 3.3059856814821044e-09, "loss": 6.968, "step": 39480 }, { "epoch": 0.0691046658259773, "grad_norm": 0.059002913534641266, "learning_rate": 2.944966766379453e-09, "loss": 6.9719, "step": 39490 }, { "epoch": 0.06935687263556116, "grad_norm": 0.05503362789750099, "learning_rate": 2.604814671571809e-09, "loss": 6.9744, "step": 39500 }, { "epoch": 0.06960907944514502, "grad_norm": 0.05822187662124634, "learning_rate": 2.2855296336832254e-09, "loss": 6.972, "step": 39510 }, { "epoch": 0.06986128625472887, "grad_norm": 0.05547050014138222, "learning_rate": 1.987111874812708e-09, "loss": 6.9714, "step": 39520 }, { "epoch": 0.07011349306431273, "grad_norm": 0.06606748700141907, "learning_rate": 1.7095616025541995e-09, "loss": 6.9763, "step": 39530 }, { "epoch": 0.0703656998738966, "grad_norm": 0.05147161707282066, "learning_rate": 1.4528790099732624e-09, "loss": 6.9739, "step": 39540 }, { "epoch": 0.07061790668348046, "grad_norm": 0.05669853463768959, "learning_rate": 1.2170642756270667e-09, "loss": 6.9688, "step": 39550 }, { "epoch": 0.07087011349306431, "grad_norm": 0.05581197887659073, "learning_rate": 1.002117563557725e-09, "loss": 6.9724, "step": 39560 }, { "epoch": 0.07112232030264817, "grad_norm": 0.06177473068237305, "learning_rate": 8.080390232889645e-10, "loss": 6.9754, "step": 39570 }, { "epoch": 0.07137452711223204, "grad_norm": 0.06336364895105362, "learning_rate": 6.348287898261251e-10, "loss": 6.975, "step": 39580 }, { "epoch": 0.07162673392181589, "grad_norm": 0.053177691996097565, "learning_rate": 4.824869836561607e-10, "loss": 6.974, "step": 39590 }, { "epoch": 0.07187894073139975, "grad_norm": 0.05643773451447487, "learning_rate": 3.5101371075763054e-10, "loss": 6.9743, "step": 39600 }, { "epoch": 0.07213114754098361, "grad_norm": 0.05750450864434242, "learning_rate": 2.4040906258404606e-10, "loss": 6.9762, "step": 39610 }, { "epoch": 0.07238335435056746, "grad_norm": 0.050427939742803574, "learning_rate": 1.506731160738628e-10, "loss": 6.9735, "step": 39620 }, { "epoch": 0.07263556116015132, "grad_norm": 0.060212425887584686, "learning_rate": 8.180593365381128e-11, "loss": 6.9749, "step": 39630 }, { "epoch": 0.07288776796973519, "grad_norm": 0.054166074842214584, "learning_rate": 3.380756322890477e-11, "loss": 6.9701, "step": 39640 }, { "epoch": 0.07313997477931904, "grad_norm": 0.05737795680761337, "learning_rate": 6.678038189100733e-12, "loss": 6.9677, "step": 39650 } ], "logging_steps": 10, "max_steps": 39650, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 128, "trial_name": null, "trial_params": null }