| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 50, |
| "global_step": 2488, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0020096463022508037, |
| "grad_norm": 25.43042279854588, |
| "learning_rate": 1.6064257028112448e-07, |
| "loss": 1.5516, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0040192926045016075, |
| "grad_norm": 32.55468907279309, |
| "learning_rate": 3.614457831325301e-07, |
| "loss": 1.6285, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.006028938906752411, |
| "grad_norm": 22.457510712636555, |
| "learning_rate": 5.622489959839358e-07, |
| "loss": 1.544, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.008038585209003215, |
| "grad_norm": 12.994153155174507, |
| "learning_rate": 7.630522088353415e-07, |
| "loss": 1.4871, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01004823151125402, |
| "grad_norm": 13.10275994035317, |
| "learning_rate": 9.638554216867472e-07, |
| "loss": 1.3155, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.012057877813504822, |
| "grad_norm": 9.886808975385357, |
| "learning_rate": 1.1646586345381528e-06, |
| "loss": 1.2181, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.014067524115755627, |
| "grad_norm": 9.765195243051854, |
| "learning_rate": 1.3654618473895584e-06, |
| "loss": 1.1915, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01607717041800643, |
| "grad_norm": 10.076388693447448, |
| "learning_rate": 1.566265060240964e-06, |
| "loss": 1.1387, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.018086816720257234, |
| "grad_norm": 9.74974655232714, |
| "learning_rate": 1.7670682730923696e-06, |
| "loss": 1.0759, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.02009646302250804, |
| "grad_norm": 8.925268910533704, |
| "learning_rate": 1.967871485943775e-06, |
| "loss": 1.0175, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02009646302250804, |
| "eval_cooking_sharegpt_test_loss": 0.9884688854217529, |
| "eval_cooking_sharegpt_test_runtime": 29.6069, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.755, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.338, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.022106109324758844, |
| "grad_norm": 9.02573766663436, |
| "learning_rate": 2.168674698795181e-06, |
| "loss": 0.9542, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.024115755627009645, |
| "grad_norm": 9.819497852533047, |
| "learning_rate": 2.3694779116465868e-06, |
| "loss": 0.9784, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02612540192926045, |
| "grad_norm": 10.82311981416087, |
| "learning_rate": 2.5702811244979918e-06, |
| "loss": 1.0231, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.028135048231511254, |
| "grad_norm": 8.964625514542233, |
| "learning_rate": 2.771084337349398e-06, |
| "loss": 0.8573, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03014469453376206, |
| "grad_norm": 8.532301605077798, |
| "learning_rate": 2.9718875502008034e-06, |
| "loss": 0.9551, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.03215434083601286, |
| "grad_norm": 9.990828654438014, |
| "learning_rate": 3.172690763052209e-06, |
| "loss": 0.9182, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.034163987138263664, |
| "grad_norm": 6.995692527275145, |
| "learning_rate": 3.3734939759036146e-06, |
| "loss": 0.8639, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.03617363344051447, |
| "grad_norm": 7.768080152065188, |
| "learning_rate": 3.5742971887550204e-06, |
| "loss": 0.8521, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03818327974276527, |
| "grad_norm": 8.012119974468852, |
| "learning_rate": 3.7751004016064258e-06, |
| "loss": 0.8477, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.04019292604501608, |
| "grad_norm": 8.50117811151367, |
| "learning_rate": 3.975903614457832e-06, |
| "loss": 0.8473, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04019292604501608, |
| "eval_cooking_sharegpt_test_loss": 0.8732408285140991, |
| "eval_cooking_sharegpt_test_runtime": 29.1007, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.873, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.344, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04220257234726688, |
| "grad_norm": 8.560443518575713, |
| "learning_rate": 4.176706827309237e-06, |
| "loss": 0.8652, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.04421221864951769, |
| "grad_norm": 9.719112811630923, |
| "learning_rate": 4.377510040160643e-06, |
| "loss": 0.8941, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04622186495176849, |
| "grad_norm": 9.985676476471362, |
| "learning_rate": 4.578313253012049e-06, |
| "loss": 0.8859, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.04823151125401929, |
| "grad_norm": 8.414088670486853, |
| "learning_rate": 4.779116465863454e-06, |
| "loss": 0.8043, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.050241157556270094, |
| "grad_norm": 8.096501738165966, |
| "learning_rate": 4.979919678714859e-06, |
| "loss": 0.8565, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.0522508038585209, |
| "grad_norm": 9.410994081814192, |
| "learning_rate": 5.180722891566266e-06, |
| "loss": 0.9358, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.0542604501607717, |
| "grad_norm": 7.421809482089455, |
| "learning_rate": 5.381526104417672e-06, |
| "loss": 0.8336, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.05627009646302251, |
| "grad_norm": 8.907995665308611, |
| "learning_rate": 5.582329317269076e-06, |
| "loss": 0.8398, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05827974276527331, |
| "grad_norm": 7.103996712375502, |
| "learning_rate": 5.783132530120482e-06, |
| "loss": 0.8702, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.06028938906752412, |
| "grad_norm": 8.485504235075577, |
| "learning_rate": 5.983935742971888e-06, |
| "loss": 0.8542, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06028938906752412, |
| "eval_cooking_sharegpt_test_loss": 0.8469827175140381, |
| "eval_cooking_sharegpt_test_runtime": 29.1354, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.864, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.06229903536977492, |
| "grad_norm": 7.565513220130896, |
| "learning_rate": 6.184738955823294e-06, |
| "loss": 0.8558, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.06430868167202572, |
| "grad_norm": 9.7278611728726, |
| "learning_rate": 6.385542168674699e-06, |
| "loss": 0.9122, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.06631832797427653, |
| "grad_norm": 8.171433737702468, |
| "learning_rate": 6.586345381526105e-06, |
| "loss": 0.8225, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.06832797427652733, |
| "grad_norm": 7.424345320287168, |
| "learning_rate": 6.78714859437751e-06, |
| "loss": 0.838, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.07033762057877814, |
| "grad_norm": 6.778825035197842, |
| "learning_rate": 6.987951807228917e-06, |
| "loss": 0.7698, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.07234726688102894, |
| "grad_norm": 8.160040638569848, |
| "learning_rate": 7.188755020080321e-06, |
| "loss": 0.8443, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.07435691318327975, |
| "grad_norm": 7.56314606717551, |
| "learning_rate": 7.389558232931727e-06, |
| "loss": 0.7953, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.07636655948553055, |
| "grad_norm": 7.860496549752045, |
| "learning_rate": 7.590361445783133e-06, |
| "loss": 0.8839, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.07837620578778134, |
| "grad_norm": 6.887754234302554, |
| "learning_rate": 7.79116465863454e-06, |
| "loss": 0.796, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.08038585209003216, |
| "grad_norm": 7.785553062894794, |
| "learning_rate": 7.991967871485944e-06, |
| "loss": 0.8336, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.08038585209003216, |
| "eval_cooking_sharegpt_test_loss": 0.8259029984474182, |
| "eval_cooking_sharegpt_test_runtime": 29.1235, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.867, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.08239549839228295, |
| "grad_norm": 6.994614576677781, |
| "learning_rate": 8.19277108433735e-06, |
| "loss": 0.8181, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.08440514469453377, |
| "grad_norm": 7.772788858949606, |
| "learning_rate": 8.393574297188756e-06, |
| "loss": 0.8408, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.08641479099678456, |
| "grad_norm": 7.254386585395993, |
| "learning_rate": 8.594377510040161e-06, |
| "loss": 0.9085, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.08842443729903537, |
| "grad_norm": 7.026321018704356, |
| "learning_rate": 8.795180722891567e-06, |
| "loss": 0.8782, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.09043408360128617, |
| "grad_norm": 7.051507761624435, |
| "learning_rate": 8.995983935742972e-06, |
| "loss": 0.9409, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.09244372990353698, |
| "grad_norm": 7.606219516981015, |
| "learning_rate": 9.196787148594378e-06, |
| "loss": 0.8555, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.09445337620578778, |
| "grad_norm": 6.28124366103456, |
| "learning_rate": 9.397590361445785e-06, |
| "loss": 0.7534, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.09646302250803858, |
| "grad_norm": 8.394942968125275, |
| "learning_rate": 9.598393574297188e-06, |
| "loss": 0.8402, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.09847266881028939, |
| "grad_norm": 7.156441873531409, |
| "learning_rate": 9.799196787148595e-06, |
| "loss": 0.8081, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.10048231511254019, |
| "grad_norm": 7.327698916304687, |
| "learning_rate": 1e-05, |
| "loss": 0.7723, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.10048231511254019, |
| "eval_cooking_sharegpt_test_loss": 0.8293350338935852, |
| "eval_cooking_sharegpt_test_runtime": 29.0949, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.874, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.344, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.102491961414791, |
| "grad_norm": 7.4235500079351375, |
| "learning_rate": 9.999876953350016e-06, |
| "loss": 0.8151, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.1045016077170418, |
| "grad_norm": 7.084895816067402, |
| "learning_rate": 9.999507819456254e-06, |
| "loss": 0.7621, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.10651125401929261, |
| "grad_norm": 6.151331764138221, |
| "learning_rate": 9.998892616486991e-06, |
| "loss": 0.803, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.1085209003215434, |
| "grad_norm": 7.692005262695669, |
| "learning_rate": 9.99803137472169e-06, |
| "loss": 0.7968, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.11053054662379422, |
| "grad_norm": 7.296616213551366, |
| "learning_rate": 9.996924136549519e-06, |
| "loss": 0.8934, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.11254019292604502, |
| "grad_norm": 6.78581453909987, |
| "learning_rate": 9.995570956467257e-06, |
| "loss": 0.8168, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.11454983922829581, |
| "grad_norm": 6.629376557450756, |
| "learning_rate": 9.993971901076614e-06, |
| "loss": 0.8536, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.11655948553054662, |
| "grad_norm": 6.125893044121695, |
| "learning_rate": 9.992127049080952e-06, |
| "loss": 0.8304, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.11856913183279742, |
| "grad_norm": 6.3713359597576416, |
| "learning_rate": 9.990036491281418e-06, |
| "loss": 0.8069, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.12057877813504823, |
| "grad_norm": 6.375162182116364, |
| "learning_rate": 9.98770033057246e-06, |
| "loss": 0.8101, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.12057877813504823, |
| "eval_cooking_sharegpt_test_loss": 0.797538161277771, |
| "eval_cooking_sharegpt_test_runtime": 29.1177, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.869, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.12258842443729903, |
| "grad_norm": 7.814959047420182, |
| "learning_rate": 9.985118681936783e-06, |
| "loss": 0.8315, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.12459807073954984, |
| "grad_norm": 6.8494733103452194, |
| "learning_rate": 9.982291672439671e-06, |
| "loss": 0.7654, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.12660771704180065, |
| "grad_norm": 5.286224086045848, |
| "learning_rate": 9.979219441222743e-06, |
| "loss": 0.7776, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.12861736334405144, |
| "grad_norm": 6.103426753817979, |
| "learning_rate": 9.975902139497105e-06, |
| "loss": 0.815, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.13062700964630225, |
| "grad_norm": 5.520317998483916, |
| "learning_rate": 9.972339930535897e-06, |
| "loss": 0.813, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.13263665594855306, |
| "grad_norm": 5.9863235363550045, |
| "learning_rate": 9.968532989666277e-06, |
| "loss": 0.7504, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.13464630225080385, |
| "grad_norm": 7.212782518237145, |
| "learning_rate": 9.96448150426077e-06, |
| "loss": 0.8715, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.13665594855305466, |
| "grad_norm": 5.912590773720034, |
| "learning_rate": 9.96018567372806e-06, |
| "loss": 0.7558, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.13866559485530547, |
| "grad_norm": 5.77435813010867, |
| "learning_rate": 9.95564570950317e-06, |
| "loss": 0.779, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.14067524115755628, |
| "grad_norm": 6.505458706803495, |
| "learning_rate": 9.950861835037053e-06, |
| "loss": 0.8514, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.14067524115755628, |
| "eval_cooking_sharegpt_test_loss": 0.7728434205055237, |
| "eval_cooking_sharegpt_test_runtime": 29.1638, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.858, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.14268488745980706, |
| "grad_norm": 5.6452446665378115, |
| "learning_rate": 9.945834285785601e-06, |
| "loss": 0.6856, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.14469453376205788, |
| "grad_norm": 5.6265260781387365, |
| "learning_rate": 9.94056330919805e-06, |
| "loss": 0.8083, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1467041800643087, |
| "grad_norm": 5.368904478416109, |
| "learning_rate": 9.935049164704809e-06, |
| "loss": 0.6928, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.1487138263665595, |
| "grad_norm": 7.881458451902342, |
| "learning_rate": 9.929292123704677e-06, |
| "loss": 0.7741, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.15072347266881028, |
| "grad_norm": 6.532061219960651, |
| "learning_rate": 9.923292469551498e-06, |
| "loss": 0.8097, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.1527331189710611, |
| "grad_norm": 6.450102699245797, |
| "learning_rate": 9.91705049754021e-06, |
| "loss": 0.8684, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.1547427652733119, |
| "grad_norm": 5.574142168212173, |
| "learning_rate": 9.910566514892311e-06, |
| "loss": 0.7809, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.1567524115755627, |
| "grad_norm": 6.795118484011259, |
| "learning_rate": 9.903840840740739e-06, |
| "loss": 0.8092, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.1587620578778135, |
| "grad_norm": 6.521136743595867, |
| "learning_rate": 9.896873806114164e-06, |
| "loss": 0.7888, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.1607717041800643, |
| "grad_norm": 6.632901741168955, |
| "learning_rate": 9.889665753920693e-06, |
| "loss": 0.7539, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1607717041800643, |
| "eval_cooking_sharegpt_test_loss": 0.7521212697029114, |
| "eval_cooking_sharegpt_test_runtime": 29.1522, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.861, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.16278135048231512, |
| "grad_norm": 5.843964500525819, |
| "learning_rate": 9.882217038930996e-06, |
| "loss": 0.7583, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.1647909967845659, |
| "grad_norm": 5.922756974819583, |
| "learning_rate": 9.874528027760844e-06, |
| "loss": 0.7904, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.16680064308681672, |
| "grad_norm": 5.184809873689736, |
| "learning_rate": 9.866599098853065e-06, |
| "loss": 0.6878, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.16881028938906753, |
| "grad_norm": 6.0072795392775875, |
| "learning_rate": 9.858430642458911e-06, |
| "loss": 0.7625, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.17081993569131831, |
| "grad_norm": 6.3898364197395505, |
| "learning_rate": 9.850023060618865e-06, |
| "loss": 0.8075, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.17282958199356913, |
| "grad_norm": 5.4615901982761725, |
| "learning_rate": 9.841376767142836e-06, |
| "loss": 0.7334, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.17483922829581994, |
| "grad_norm": 5.072280085120411, |
| "learning_rate": 9.832492187589803e-06, |
| "loss": 0.7006, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.17684887459807075, |
| "grad_norm": 4.902329337695089, |
| "learning_rate": 9.823369759246866e-06, |
| "loss": 0.7779, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.17885852090032153, |
| "grad_norm": 5.961804769639084, |
| "learning_rate": 9.814009931107724e-06, |
| "loss": 0.7983, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.18086816720257234, |
| "grad_norm": 4.8028511364670115, |
| "learning_rate": 9.804413163850578e-06, |
| "loss": 0.6964, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.18086816720257234, |
| "eval_cooking_sharegpt_test_loss": 0.7369500994682312, |
| "eval_cooking_sharegpt_test_runtime": 29.1063, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.871, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.344, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.18287781350482316, |
| "grad_norm": 4.8914718117824405, |
| "learning_rate": 9.79457992981545e-06, |
| "loss": 0.7367, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.18488745980707397, |
| "grad_norm": 5.402352390784586, |
| "learning_rate": 9.784510712980944e-06, |
| "loss": 0.6798, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.18689710610932475, |
| "grad_norm": 5.701737276259646, |
| "learning_rate": 9.774206008940418e-06, |
| "loss": 0.7226, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.18890675241157556, |
| "grad_norm": 6.263433603685716, |
| "learning_rate": 9.7636663248776e-06, |
| "loss": 0.8274, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.19091639871382637, |
| "grad_norm": 5.312503789863195, |
| "learning_rate": 9.75289217954161e-06, |
| "loss": 0.7734, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.19292604501607716, |
| "grad_norm": 5.156337860674348, |
| "learning_rate": 9.741884103221451e-06, |
| "loss": 0.7659, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.19493569131832797, |
| "grad_norm": 6.800003297542305, |
| "learning_rate": 9.730642637719884e-06, |
| "loss": 0.7985, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.19694533762057878, |
| "grad_norm": 5.800498897893185, |
| "learning_rate": 9.71916833632678e-06, |
| "loss": 0.7221, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.1989549839228296, |
| "grad_norm": 4.793819929790139, |
| "learning_rate": 9.707461763791879e-06, |
| "loss": 0.715, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.20096463022508038, |
| "grad_norm": 5.1410323292838065, |
| "learning_rate": 9.69552349629699e-06, |
| "loss": 0.7628, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.20096463022508038, |
| "eval_cooking_sharegpt_test_loss": 0.7155391573905945, |
| "eval_cooking_sharegpt_test_runtime": 29.1357, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.864, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2029742765273312, |
| "grad_norm": 4.650244956653454, |
| "learning_rate": 9.683354121427645e-06, |
| "loss": 0.6865, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.204983922829582, |
| "grad_norm": 5.005060659354274, |
| "learning_rate": 9.670954238144165e-06, |
| "loss": 0.7376, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.2069935691318328, |
| "grad_norm": 4.9392891385103495, |
| "learning_rate": 9.658324456752194e-06, |
| "loss": 0.6808, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.2090032154340836, |
| "grad_norm": 5.07290134380499, |
| "learning_rate": 9.645465398872645e-06, |
| "loss": 0.6335, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2110128617363344, |
| "grad_norm": 6.372463646337311, |
| "learning_rate": 9.632377697411114e-06, |
| "loss": 0.7125, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.21302250803858522, |
| "grad_norm": 4.8784584544005565, |
| "learning_rate": 9.619061996526735e-06, |
| "loss": 0.7647, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.215032154340836, |
| "grad_norm": 4.991918572196447, |
| "learning_rate": 9.605518951600456e-06, |
| "loss": 0.7159, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.2170418006430868, |
| "grad_norm": 4.7469831719019, |
| "learning_rate": 9.591749229202805e-06, |
| "loss": 0.8187, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.21905144694533762, |
| "grad_norm": 5.013038664469992, |
| "learning_rate": 9.577753507061063e-06, |
| "loss": 0.7215, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.22106109324758844, |
| "grad_norm": 4.814679760540896, |
| "learning_rate": 9.563532474025922e-06, |
| "loss": 0.6789, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.22106109324758844, |
| "eval_cooking_sharegpt_test_loss": 0.706028163433075, |
| "eval_cooking_sharegpt_test_runtime": 29.1544, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.86, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.22307073954983922, |
| "grad_norm": 5.071008410167568, |
| "learning_rate": 9.549086830037573e-06, |
| "loss": 0.7722, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.22508038585209003, |
| "grad_norm": 4.819320792169161, |
| "learning_rate": 9.534417286091254e-06, |
| "loss": 0.6459, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.22709003215434084, |
| "grad_norm": 5.805791958928098, |
| "learning_rate": 9.519524564202261e-06, |
| "loss": 0.7018, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.22909967845659163, |
| "grad_norm": 5.261658646649239, |
| "learning_rate": 9.50440939737041e-06, |
| "loss": 0.7692, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.23110932475884244, |
| "grad_norm": 6.656814899492019, |
| "learning_rate": 9.489072529543955e-06, |
| "loss": 0.8188, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.23311897106109325, |
| "grad_norm": 5.482276123459045, |
| "learning_rate": 9.473514715582982e-06, |
| "loss": 0.727, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.23512861736334406, |
| "grad_norm": 5.089480873020227, |
| "learning_rate": 9.457736721222245e-06, |
| "loss": 0.7129, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.23713826366559485, |
| "grad_norm": 4.422268494779517, |
| "learning_rate": 9.441739323033485e-06, |
| "loss": 0.6732, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.23914790996784566, |
| "grad_norm": 5.561830088062397, |
| "learning_rate": 9.425523308387203e-06, |
| "loss": 0.625, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.24115755627009647, |
| "grad_norm": 5.241025479724879, |
| "learning_rate": 9.409089475413912e-06, |
| "loss": 0.743, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.24115755627009647, |
| "eval_cooking_sharegpt_test_loss": 0.7122946381568909, |
| "eval_cooking_sharegpt_test_runtime": 29.1346, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.865, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.24316720257234728, |
| "grad_norm": 5.9963960390112945, |
| "learning_rate": 9.392438632964847e-06, |
| "loss": 0.7522, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.24517684887459806, |
| "grad_norm": 5.407942176340359, |
| "learning_rate": 9.375571600572165e-06, |
| "loss": 0.7116, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.24718649517684887, |
| "grad_norm": 5.318941810734261, |
| "learning_rate": 9.358489208408594e-06, |
| "loss": 0.7307, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.2491961414790997, |
| "grad_norm": 4.610264529077515, |
| "learning_rate": 9.341192297246588e-06, |
| "loss": 0.7274, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2512057877813505, |
| "grad_norm": 4.432769811479363, |
| "learning_rate": 9.323681718416937e-06, |
| "loss": 0.6281, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.2532154340836013, |
| "grad_norm": 4.6735771702678965, |
| "learning_rate": 9.305958333766867e-06, |
| "loss": 0.6655, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.25522508038585207, |
| "grad_norm": 6.091308209571449, |
| "learning_rate": 9.288023015617618e-06, |
| "loss": 0.7275, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.2572347266881029, |
| "grad_norm": 4.6463773174566, |
| "learning_rate": 9.269876646721519e-06, |
| "loss": 0.6827, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.2592443729903537, |
| "grad_norm": 4.452810636438574, |
| "learning_rate": 9.251520120218528e-06, |
| "loss": 0.6883, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.2612540192926045, |
| "grad_norm": 6.398787653321348, |
| "learning_rate": 9.232954339592285e-06, |
| "loss": 0.7807, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.2612540192926045, |
| "eval_cooking_sharegpt_test_loss": 0.6995799541473389, |
| "eval_cooking_sharegpt_test_runtime": 29.1239, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.867, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.2632636655948553, |
| "grad_norm": 5.489052927932185, |
| "learning_rate": 9.214180218625632e-06, |
| "loss": 0.6858, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.2652733118971061, |
| "grad_norm": 5.330659230842556, |
| "learning_rate": 9.195198681355647e-06, |
| "loss": 0.711, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.26728295819935693, |
| "grad_norm": 5.581942505955761, |
| "learning_rate": 9.176010662028157e-06, |
| "loss": 0.6628, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.2692926045016077, |
| "grad_norm": 5.131847371991602, |
| "learning_rate": 9.156617105051763e-06, |
| "loss": 0.679, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.2713022508038585, |
| "grad_norm": 4.7770105092526896, |
| "learning_rate": 9.13701896495135e-06, |
| "loss": 0.7146, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.2733118971061093, |
| "grad_norm": 4.982534700495619, |
| "learning_rate": 9.117217206321113e-06, |
| "loss": 0.7721, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.2753215434083601, |
| "grad_norm": 4.798649936226053, |
| "learning_rate": 9.09721280377708e-06, |
| "loss": 0.7748, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.27733118971061094, |
| "grad_norm": 4.601964700694306, |
| "learning_rate": 9.077006741909133e-06, |
| "loss": 0.7435, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.27934083601286175, |
| "grad_norm": 5.566494391677513, |
| "learning_rate": 9.056600015232567e-06, |
| "loss": 0.6952, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.28135048231511256, |
| "grad_norm": 5.727808059117751, |
| "learning_rate": 9.035993628139117e-06, |
| "loss": 0.6711, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.28135048231511256, |
| "eval_cooking_sharegpt_test_loss": 0.6903340220451355, |
| "eval_cooking_sharegpt_test_runtime": 29.1049, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.872, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.344, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.28336012861736337, |
| "grad_norm": 3.8470896481653494, |
| "learning_rate": 9.01518859484755e-06, |
| "loss": 0.6729, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.2853697749196141, |
| "grad_norm": 4.87977503518934, |
| "learning_rate": 8.99418593935372e-06, |
| "loss": 0.6863, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.28737942122186494, |
| "grad_norm": 4.674016276471173, |
| "learning_rate": 8.972986695380189e-06, |
| "loss": 0.6651, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.28938906752411575, |
| "grad_norm": 5.15746212298852, |
| "learning_rate": 8.95159190632534e-06, |
| "loss": 0.6642, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.29139871382636656, |
| "grad_norm": 5.180219693756491, |
| "learning_rate": 8.930002625212018e-06, |
| "loss": 0.6115, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.2934083601286174, |
| "grad_norm": 4.6310292453141315, |
| "learning_rate": 8.908219914635711e-06, |
| "loss": 0.7092, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.2954180064308682, |
| "grad_norm": 5.683343490286694, |
| "learning_rate": 8.886244846712245e-06, |
| "loss": 0.7257, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.297427652733119, |
| "grad_norm": 4.9104420352970015, |
| "learning_rate": 8.864078503025017e-06, |
| "loss": 0.7523, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.29943729903536975, |
| "grad_norm": 4.821845146152399, |
| "learning_rate": 8.841721974571758e-06, |
| "loss": 0.6734, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.30144694533762056, |
| "grad_norm": 4.700309572076156, |
| "learning_rate": 8.819176361710842e-06, |
| "loss": 0.6201, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.30144694533762056, |
| "eval_cooking_sharegpt_test_loss": 0.6827989816665649, |
| "eval_cooking_sharegpt_test_runtime": 29.1653, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.857, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3034565916398714, |
| "grad_norm": 4.984810224063278, |
| "learning_rate": 8.796442774107123e-06, |
| "loss": 0.7233, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.3054662379421222, |
| "grad_norm": 4.572885347797854, |
| "learning_rate": 8.77352233067732e-06, |
| "loss": 0.7791, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.307475884244373, |
| "grad_norm": 4.723286735310018, |
| "learning_rate": 8.750416159534944e-06, |
| "loss": 0.692, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.3094855305466238, |
| "grad_norm": 5.060218829161682, |
| "learning_rate": 8.727125397934777e-06, |
| "loss": 0.6615, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3114951768488746, |
| "grad_norm": 4.833251557506912, |
| "learning_rate": 8.703651192216896e-06, |
| "loss": 0.7046, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.3135048231511254, |
| "grad_norm": 5.151428527882602, |
| "learning_rate": 8.67999469775025e-06, |
| "loss": 0.6859, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.3155144694533762, |
| "grad_norm": 5.403069037330436, |
| "learning_rate": 8.656157078875794e-06, |
| "loss": 0.6585, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.317524115755627, |
| "grad_norm": 4.594702329252282, |
| "learning_rate": 8.632139508849192e-06, |
| "loss": 0.6662, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.3195337620578778, |
| "grad_norm": 3.82227587899388, |
| "learning_rate": 8.60794316978305e-06, |
| "loss": 0.6479, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.3215434083601286, |
| "grad_norm": 4.050592455983538, |
| "learning_rate": 8.583569252588761e-06, |
| "loss": 0.6634, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3215434083601286, |
| "eval_cooking_sharegpt_test_loss": 0.6700084209442139, |
| "eval_cooking_sharegpt_test_runtime": 29.1558, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.86, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.32355305466237944, |
| "grad_norm": 4.7546247450248895, |
| "learning_rate": 8.559018956917864e-06, |
| "loss": 0.6893, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.32556270096463025, |
| "grad_norm": 4.892180049499131, |
| "learning_rate": 8.534293491103014e-06, |
| "loss": 0.7171, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.327572347266881, |
| "grad_norm": 4.593573940130922, |
| "learning_rate": 8.50939407209851e-06, |
| "loss": 0.602, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.3295819935691318, |
| "grad_norm": 4.812442397504877, |
| "learning_rate": 8.484321925420383e-06, |
| "loss": 0.6965, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.3315916398713826, |
| "grad_norm": 4.995658301324656, |
| "learning_rate": 8.459078285086103e-06, |
| "loss": 0.6757, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.33360128617363344, |
| "grad_norm": 4.411896351243907, |
| "learning_rate": 8.433664393553815e-06, |
| "loss": 0.6125, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.33561093247588425, |
| "grad_norm": 4.037149487682339, |
| "learning_rate": 8.40808150166121e-06, |
| "loss": 0.5841, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.33762057877813506, |
| "grad_norm": 4.6302565097915656, |
| "learning_rate": 8.382330868563943e-06, |
| "loss": 0.669, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.3396302250803859, |
| "grad_norm": 5.037886894281096, |
| "learning_rate": 8.35641376167367e-06, |
| "loss": 0.6602, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.34163987138263663, |
| "grad_norm": 4.116477405728893, |
| "learning_rate": 8.330331456595663e-06, |
| "loss": 0.6318, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.34163987138263663, |
| "eval_cooking_sharegpt_test_loss": 0.6634958386421204, |
| "eval_cooking_sharegpt_test_runtime": 29.1155, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.869, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.34364951768488744, |
| "grad_norm": 4.53205416614136, |
| "learning_rate": 8.304085237066027e-06, |
| "loss": 0.7296, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.34565916398713825, |
| "grad_norm": 4.562829710605114, |
| "learning_rate": 8.277676394888518e-06, |
| "loss": 0.6152, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.34766881028938906, |
| "grad_norm": 4.850911945798656, |
| "learning_rate": 8.25110622987096e-06, |
| "loss": 0.6514, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.3496784565916399, |
| "grad_norm": 4.655697540410078, |
| "learning_rate": 8.22437604976127e-06, |
| "loss": 0.7196, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.3516881028938907, |
| "grad_norm": 5.125990956860812, |
| "learning_rate": 8.197487170183092e-06, |
| "loss": 0.6654, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.3536977491961415, |
| "grad_norm": 4.758879553215313, |
| "learning_rate": 8.170440914571052e-06, |
| "loss": 0.6771, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.3557073954983923, |
| "grad_norm": 4.800616804892499, |
| "learning_rate": 8.143238614105608e-06, |
| "loss": 0.6825, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.35771704180064307, |
| "grad_norm": 5.081512148323733, |
| "learning_rate": 8.115881607647538e-06, |
| "loss": 0.6968, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.3597266881028939, |
| "grad_norm": 4.824963989656846, |
| "learning_rate": 8.08837124167204e-06, |
| "loss": 0.6879, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.3617363344051447, |
| "grad_norm": 5.409545786784468, |
| "learning_rate": 8.060708870202462e-06, |
| "loss": 0.7033, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.3617363344051447, |
| "eval_cooking_sharegpt_test_loss": 0.6626113653182983, |
| "eval_cooking_sharegpt_test_runtime": 29.1696, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.856, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.3637459807073955, |
| "grad_norm": 3.7618538333099982, |
| "learning_rate": 8.032895854743661e-06, |
| "loss": 0.6522, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.3657556270096463, |
| "grad_norm": 4.543338033090877, |
| "learning_rate": 8.004933564214991e-06, |
| "loss": 0.589, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.3677652733118971, |
| "grad_norm": 4.486029590225905, |
| "learning_rate": 7.976823374882919e-06, |
| "loss": 0.6684, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.36977491961414793, |
| "grad_norm": 4.184644901714797, |
| "learning_rate": 7.948566670293298e-06, |
| "loss": 0.6203, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.3717845659163987, |
| "grad_norm": 4.232384871040644, |
| "learning_rate": 7.920164841203262e-06, |
| "loss": 0.6393, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.3737942122186495, |
| "grad_norm": 4.593579658625747, |
| "learning_rate": 7.891619285512781e-06, |
| "loss": 0.7574, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.3758038585209003, |
| "grad_norm": 4.23534793280711, |
| "learning_rate": 7.862931408195855e-06, |
| "loss": 0.5811, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.3778135048231511, |
| "grad_norm": 4.805442331593937, |
| "learning_rate": 7.834102621231364e-06, |
| "loss": 0.6265, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.37982315112540194, |
| "grad_norm": 4.667120661635427, |
| "learning_rate": 7.805134343533572e-06, |
| "loss": 0.6295, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.38183279742765275, |
| "grad_norm": 4.3862142407448905, |
| "learning_rate": 7.776028000882288e-06, |
| "loss": 0.6715, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.38183279742765275, |
| "eval_cooking_sharegpt_test_loss": 0.6476317048072815, |
| "eval_cooking_sharegpt_test_runtime": 29.1486, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.861, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.38384244372990356, |
| "grad_norm": 4.516234494825295, |
| "learning_rate": 7.746785025852695e-06, |
| "loss": 0.6513, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.3858520900321543, |
| "grad_norm": 4.063375240398369, |
| "learning_rate": 7.717406857744837e-06, |
| "loss": 0.5945, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.3878617363344051, |
| "grad_norm": 4.280617916285887, |
| "learning_rate": 7.687894942512786e-06, |
| "loss": 0.6263, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.38987138263665594, |
| "grad_norm": 4.2306442061102585, |
| "learning_rate": 7.65825073269346e-06, |
| "loss": 0.6307, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.39188102893890675, |
| "grad_norm": 3.83492814563052, |
| "learning_rate": 7.628475687335142e-06, |
| "loss": 0.6768, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.39389067524115756, |
| "grad_norm": 3.91144567078091, |
| "learning_rate": 7.598571271925667e-06, |
| "loss": 0.5288, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.3959003215434084, |
| "grad_norm": 4.8926561756291145, |
| "learning_rate": 7.568538958320291e-06, |
| "loss": 0.5691, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.3979099678456592, |
| "grad_norm": 5.343345050879741, |
| "learning_rate": 7.538380224669244e-06, |
| "loss": 0.681, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.39991961414790994, |
| "grad_norm": 4.801310495851274, |
| "learning_rate": 7.5080965553449834e-06, |
| "loss": 0.6365, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.40192926045016075, |
| "grad_norm": 4.263810617817441, |
| "learning_rate": 7.477689440869135e-06, |
| "loss": 0.6511, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.40192926045016075, |
| "eval_cooking_sharegpt_test_loss": 0.640380859375, |
| "eval_cooking_sharegpt_test_runtime": 29.1538, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.86, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.40393890675241156, |
| "grad_norm": 4.608176994643662, |
| "learning_rate": 7.447160377839125e-06, |
| "loss": 0.6558, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.4059485530546624, |
| "grad_norm": 4.184997495410128, |
| "learning_rate": 7.416510868854529e-06, |
| "loss": 0.6028, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.4079581993569132, |
| "grad_norm": 4.14811982617309, |
| "learning_rate": 7.385742422443108e-06, |
| "loss": 0.6116, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.409967845659164, |
| "grad_norm": 4.4179772033730975, |
| "learning_rate": 7.354856552986563e-06, |
| "loss": 0.6657, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.4119774919614148, |
| "grad_norm": 3.872451257996019, |
| "learning_rate": 7.323854780646002e-06, |
| "loss": 0.616, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.4139871382636656, |
| "grad_norm": 4.575816735109395, |
| "learning_rate": 7.2927386312871185e-06, |
| "loss": 0.6595, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.4159967845659164, |
| "grad_norm": 4.636123500937769, |
| "learning_rate": 7.261509636405087e-06, |
| "loss": 0.537, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.4180064308681672, |
| "grad_norm": 5.110397360196885, |
| "learning_rate": 7.230169333049188e-06, |
| "loss": 0.6751, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.420016077170418, |
| "grad_norm": 4.095989085644148, |
| "learning_rate": 7.198719263747158e-06, |
| "loss": 0.6638, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.4220257234726688, |
| "grad_norm": 5.40481290202407, |
| "learning_rate": 7.167160976429264e-06, |
| "loss": 0.6804, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.4220257234726688, |
| "eval_cooking_sharegpt_test_loss": 0.6353456974029541, |
| "eval_cooking_sharegpt_test_runtime": 29.1174, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.869, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.4240353697749196, |
| "grad_norm": 4.345481799467673, |
| "learning_rate": 7.13549602435212e-06, |
| "loss": 0.6407, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.42604501607717044, |
| "grad_norm": 4.1689356990951145, |
| "learning_rate": 7.103725966022233e-06, |
| "loss": 0.6676, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.42805466237942125, |
| "grad_norm": 4.124587131944435, |
| "learning_rate": 7.071852365119306e-06, |
| "loss": 0.5613, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.430064308681672, |
| "grad_norm": 5.215102388713326, |
| "learning_rate": 7.039876790419262e-06, |
| "loss": 0.6349, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.4320739549839228, |
| "grad_norm": 4.222772794043426, |
| "learning_rate": 7.0078008157170415e-06, |
| "loss": 0.5982, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.4340836012861736, |
| "grad_norm": 4.044144044404326, |
| "learning_rate": 6.975626019749137e-06, |
| "loss": 0.6009, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.43609324758842444, |
| "grad_norm": 4.504810284400397, |
| "learning_rate": 6.943353986115893e-06, |
| "loss": 0.6371, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.43810289389067525, |
| "grad_norm": 4.200082136051, |
| "learning_rate": 6.910986303203556e-06, |
| "loss": 0.6367, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.44011254019292606, |
| "grad_norm": 4.474891462331243, |
| "learning_rate": 6.87852456410611e-06, |
| "loss": 0.6916, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.44212218649517687, |
| "grad_norm": 5.613000149936893, |
| "learning_rate": 6.845970366546856e-06, |
| "loss": 0.6355, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.44212218649517687, |
| "eval_cooking_sharegpt_test_loss": 0.6258378028869629, |
| "eval_cooking_sharegpt_test_runtime": 29.1265, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.867, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.44413183279742763, |
| "grad_norm": 5.123887472129782, |
| "learning_rate": 6.813325312799769e-06, |
| "loss": 0.6296, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.44614147909967844, |
| "grad_norm": 4.4261628776726605, |
| "learning_rate": 6.7805910096106555e-06, |
| "loss": 0.5624, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.44815112540192925, |
| "grad_norm": 4.638630416730711, |
| "learning_rate": 6.747769068118049e-06, |
| "loss": 0.6354, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.45016077170418006, |
| "grad_norm": 4.461694988531885, |
| "learning_rate": 6.714861103773934e-06, |
| "loss": 0.5248, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.4521704180064309, |
| "grad_norm": 5.163765933017904, |
| "learning_rate": 6.681868736264215e-06, |
| "loss": 0.6462, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.4541800643086817, |
| "grad_norm": 4.6310268658457545, |
| "learning_rate": 6.648793589429011e-06, |
| "loss": 0.6174, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.4561897106109325, |
| "grad_norm": 5.126634815687219, |
| "learning_rate": 6.61563729118273e-06, |
| "loss": 0.6466, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.45819935691318325, |
| "grad_norm": 4.319380604514048, |
| "learning_rate": 6.582401473433941e-06, |
| "loss": 0.654, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.46020900321543406, |
| "grad_norm": 4.247314315589029, |
| "learning_rate": 6.5490877720050574e-06, |
| "loss": 0.5634, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.4622186495176849, |
| "grad_norm": 4.906257391887683, |
| "learning_rate": 6.515697826551822e-06, |
| "loss": 0.692, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.4622186495176849, |
| "eval_cooking_sharegpt_test_loss": 0.6236215829849243, |
| "eval_cooking_sharegpt_test_runtime": 29.1167, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.869, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.4642282958199357, |
| "grad_norm": 4.819507990509822, |
| "learning_rate": 6.482233280482608e-06, |
| "loss": 0.6147, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.4662379421221865, |
| "grad_norm": 4.478005354409713, |
| "learning_rate": 6.448695780877532e-06, |
| "loss": 0.6581, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.4682475884244373, |
| "grad_norm": 4.193130144327688, |
| "learning_rate": 6.415086978407382e-06, |
| "loss": 0.6124, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.4702572347266881, |
| "grad_norm": 4.1578998395779365, |
| "learning_rate": 6.381408527252381e-06, |
| "loss": 0.6238, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.47226688102893893, |
| "grad_norm": 3.879162148025109, |
| "learning_rate": 6.347662085020764e-06, |
| "loss": 0.5786, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.4742765273311897, |
| "grad_norm": 4.9955060839894765, |
| "learning_rate": 6.313849312667197e-06, |
| "loss": 0.6763, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.4762861736334405, |
| "grad_norm": 4.446633316148537, |
| "learning_rate": 6.279971874411027e-06, |
| "loss": 0.6339, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.4782958199356913, |
| "grad_norm": 4.288694053242222, |
| "learning_rate": 6.246031437654368e-06, |
| "loss": 0.616, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.4803054662379421, |
| "grad_norm": 3.8400933875164087, |
| "learning_rate": 6.2120296729000395e-06, |
| "loss": 0.6927, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.48231511254019294, |
| "grad_norm": 5.8265497485402955, |
| "learning_rate": 6.177968253669337e-06, |
| "loss": 0.7054, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.48231511254019294, |
| "eval_cooking_sharegpt_test_loss": 0.6158734560012817, |
| "eval_cooking_sharegpt_test_runtime": 29.1595, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.859, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.48432475884244375, |
| "grad_norm": 4.586416002456061, |
| "learning_rate": 6.143848856419675e-06, |
| "loss": 0.6032, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.48633440514469456, |
| "grad_norm": 4.9377135174761895, |
| "learning_rate": 6.109673160462063e-06, |
| "loss": 0.6026, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.4883440514469453, |
| "grad_norm": 5.323719984452999, |
| "learning_rate": 6.075442847878463e-06, |
| "loss": 0.671, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.4903536977491961, |
| "grad_norm": 4.045561951621483, |
| "learning_rate": 6.041159603438991e-06, |
| "loss": 0.5717, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.49236334405144694, |
| "grad_norm": 3.7625870662514562, |
| "learning_rate": 6.006825114518998e-06, |
| "loss": 0.5493, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.49437299035369775, |
| "grad_norm": 4.58456420076278, |
| "learning_rate": 5.9724410710160184e-06, |
| "loss": 0.5905, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.49638263665594856, |
| "grad_norm": 4.536041072205623, |
| "learning_rate": 5.938009165266603e-06, |
| "loss": 0.6284, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.4983922829581994, |
| "grad_norm": 4.028915918788256, |
| "learning_rate": 5.903531091963011e-06, |
| "loss": 0.5853, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.5004019292604501, |
| "grad_norm": 4.599870175094803, |
| "learning_rate": 5.8690085480698075e-06, |
| "loss": 0.5881, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.502411575562701, |
| "grad_norm": 4.439772860645252, |
| "learning_rate": 5.834443232740346e-06, |
| "loss": 0.6095, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.502411575562701, |
| "eval_cooking_sharegpt_test_loss": 0.6074568033218384, |
| "eval_cooking_sharegpt_test_runtime": 29.1196, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.868, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.5044212218649518, |
| "grad_norm": 4.467706371524093, |
| "learning_rate": 5.799836847233129e-06, |
| "loss": 0.6264, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.5064308681672026, |
| "grad_norm": 4.101563851958729, |
| "learning_rate": 5.765191094828078e-06, |
| "loss": 0.555, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.5084405144694534, |
| "grad_norm": 4.466205286827134, |
| "learning_rate": 5.7305076807426975e-06, |
| "loss": 0.5756, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.5104501607717041, |
| "grad_norm": 4.701080377162357, |
| "learning_rate": 5.695788312048159e-06, |
| "loss": 0.6317, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.512459807073955, |
| "grad_norm": 4.433691534657307, |
| "learning_rate": 5.66103469758526e-06, |
| "loss": 0.6215, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.5144694533762058, |
| "grad_norm": 4.755580900859219, |
| "learning_rate": 5.626248547880337e-06, |
| "loss": 0.5824, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.5164790996784566, |
| "grad_norm": 3.679078766710477, |
| "learning_rate": 5.591431575061064e-06, |
| "loss": 0.5474, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.5184887459807074, |
| "grad_norm": 4.484327374671317, |
| "learning_rate": 5.55658549277219e-06, |
| "loss": 0.649, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.5204983922829582, |
| "grad_norm": 4.505852120752897, |
| "learning_rate": 5.5217120160911886e-06, |
| "loss": 0.6159, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.522508038585209, |
| "grad_norm": 5.01215860870057, |
| "learning_rate": 5.486812861443852e-06, |
| "loss": 0.6294, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.522508038585209, |
| "eval_cooking_sharegpt_test_loss": 0.595450758934021, |
| "eval_cooking_sharegpt_test_runtime": 29.1321, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.865, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5245176848874598, |
| "grad_norm": 4.622631736422735, |
| "learning_rate": 5.45188974651981e-06, |
| "loss": 0.6114, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.5265273311897106, |
| "grad_norm": 4.518942479671849, |
| "learning_rate": 5.416944390187977e-06, |
| "loss": 0.6818, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.5285369774919614, |
| "grad_norm": 4.341948028095467, |
| "learning_rate": 5.381978512411968e-06, |
| "loss": 0.5809, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.5305466237942122, |
| "grad_norm": 3.88446157180582, |
| "learning_rate": 5.346993834165431e-06, |
| "loss": 0.5869, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.532556270096463, |
| "grad_norm": 4.430345179234876, |
| "learning_rate": 5.311992077347351e-06, |
| "loss": 0.6948, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.5345659163987139, |
| "grad_norm": 4.164240920296163, |
| "learning_rate": 5.2769749646972935e-06, |
| "loss": 0.5607, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.5365755627009646, |
| "grad_norm": 4.2836756208794515, |
| "learning_rate": 5.241944219710624e-06, |
| "loss": 0.6401, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.5385852090032154, |
| "grad_norm": 4.126109249564745, |
| "learning_rate": 5.206901566553665e-06, |
| "loss": 0.5776, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.5405948553054662, |
| "grad_norm": 4.661804105805685, |
| "learning_rate": 5.171848729978851e-06, |
| "loss": 0.6129, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.542604501607717, |
| "grad_norm": 4.448287794131716, |
| "learning_rate": 5.136787435239825e-06, |
| "loss": 0.615, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.542604501607717, |
| "eval_cooking_sharegpt_test_loss": 0.5893608331680298, |
| "eval_cooking_sharegpt_test_runtime": 29.1232, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.867, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.5446141479099679, |
| "grad_norm": 3.831547012944043, |
| "learning_rate": 5.101719408006534e-06, |
| "loss": 0.5785, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.5466237942122186, |
| "grad_norm": 4.528939245986926, |
| "learning_rate": 5.0666463742802855e-06, |
| "loss": 0.6062, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.5486334405144695, |
| "grad_norm": 4.507770528828299, |
| "learning_rate": 5.031570060308799e-06, |
| "loss": 0.5992, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.5506430868167203, |
| "grad_norm": 3.890107991234021, |
| "learning_rate": 4.996492192501251e-06, |
| "loss": 0.5942, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.552652733118971, |
| "grad_norm": 4.065283624907987, |
| "learning_rate": 4.9614144973432855e-06, |
| "loss": 0.5971, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.5546623794212219, |
| "grad_norm": 4.937482234167603, |
| "learning_rate": 4.926338701312059e-06, |
| "loss": 0.6404, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.5566720257234726, |
| "grad_norm": 3.9548565502184325, |
| "learning_rate": 4.8912665307912435e-06, |
| "loss": 0.5026, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.5586816720257235, |
| "grad_norm": 4.423584782540341, |
| "learning_rate": 4.856199711986082e-06, |
| "loss": 0.6386, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.5606913183279743, |
| "grad_norm": 4.303957079156144, |
| "learning_rate": 4.8211399708384e-06, |
| "loss": 0.581, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.5627009646302251, |
| "grad_norm": 4.666098899764007, |
| "learning_rate": 4.786089032941683e-06, |
| "loss": 0.6602, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5627009646302251, |
| "eval_cooking_sharegpt_test_loss": 0.5806075930595398, |
| "eval_cooking_sharegpt_test_runtime": 29.1487, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.861, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5647106109324759, |
| "grad_norm": 4.509076285193486, |
| "learning_rate": 4.75104862345612e-06, |
| "loss": 0.738, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.5667202572347267, |
| "grad_norm": 4.238722358099732, |
| "learning_rate": 4.716020467023716e-06, |
| "loss": 0.564, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.5687299035369775, |
| "grad_norm": 3.7723995730660347, |
| "learning_rate": 4.68100628768339e-06, |
| "loss": 0.584, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.5707395498392283, |
| "grad_norm": 4.403249450502511, |
| "learning_rate": 4.646007808786132e-06, |
| "loss": 0.5753, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.5727491961414791, |
| "grad_norm": 3.4896736313458927, |
| "learning_rate": 4.611026752910172e-06, |
| "loss": 0.4941, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.5747588424437299, |
| "grad_norm": 4.782921151183437, |
| "learning_rate": 4.576064841776207e-06, |
| "loss": 0.5882, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.5767684887459807, |
| "grad_norm": 4.523388455740361, |
| "learning_rate": 4.541123796162656e-06, |
| "loss": 0.6504, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.5787781350482315, |
| "grad_norm": 4.846919506027111, |
| "learning_rate": 4.506205335820959e-06, |
| "loss": 0.6503, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.5807877813504824, |
| "grad_norm": 4.353820829633878, |
| "learning_rate": 4.471311179390946e-06, |
| "loss": 0.5788, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.5827974276527331, |
| "grad_norm": 4.515430059421715, |
| "learning_rate": 4.436443044316236e-06, |
| "loss": 0.6004, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.5827974276527331, |
| "eval_cooking_sharegpt_test_loss": 0.5744032263755798, |
| "eval_cooking_sharegpt_test_runtime": 29.1343, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.865, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.5848070739549839, |
| "grad_norm": 3.80274840853952, |
| "learning_rate": 4.401602646759717e-06, |
| "loss": 0.5645, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.5868167202572347, |
| "grad_norm": 3.626670598918427, |
| "learning_rate": 4.366791701519065e-06, |
| "loss": 0.5602, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.5888263665594855, |
| "grad_norm": 3.9743598170373886, |
| "learning_rate": 4.332011921942365e-06, |
| "loss": 0.5964, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.5908360128617364, |
| "grad_norm": 3.83250555228501, |
| "learning_rate": 4.297265019843755e-06, |
| "loss": 0.5535, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.5928456591639871, |
| "grad_norm": 3.6139874244672194, |
| "learning_rate": 4.262552705419203e-06, |
| "loss": 0.5168, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.594855305466238, |
| "grad_norm": 4.740911098327277, |
| "learning_rate": 4.227876687162303e-06, |
| "loss": 0.525, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.5968649517684887, |
| "grad_norm": 3.7870286568972276, |
| "learning_rate": 4.193238671780212e-06, |
| "loss": 0.5515, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.5988745980707395, |
| "grad_norm": 5.1311308301838086, |
| "learning_rate": 4.15864036410963e-06, |
| "loss": 0.6117, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.6008842443729904, |
| "grad_norm": 4.4089674794586236, |
| "learning_rate": 4.124083467032902e-06, |
| "loss": 0.5846, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.6028938906752411, |
| "grad_norm": 4.051889135358413, |
| "learning_rate": 4.08956968139419e-06, |
| "loss": 0.5855, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6028938906752411, |
| "eval_cooking_sharegpt_test_loss": 0.5694165229797363, |
| "eval_cooking_sharegpt_test_runtime": 29.1123, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.87, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.604903536977492, |
| "grad_norm": 4.1318438937071, |
| "learning_rate": 4.05510070591578e-06, |
| "loss": 0.6138, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.6069131832797428, |
| "grad_norm": 3.601915970098065, |
| "learning_rate": 4.020678237114451e-06, |
| "loss": 0.5932, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.6089228295819936, |
| "grad_norm": 3.9179716478128372, |
| "learning_rate": 3.986303969217996e-06, |
| "loss": 0.5754, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.6109324758842444, |
| "grad_norm": 4.231240281833697, |
| "learning_rate": 3.951979594081818e-06, |
| "loss": 0.5833, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.6129421221864951, |
| "grad_norm": 3.6790737814106462, |
| "learning_rate": 3.917706801105663e-06, |
| "loss": 0.5875, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.614951768488746, |
| "grad_norm": 3.83516561393742, |
| "learning_rate": 3.883487277150481e-06, |
| "loss": 0.5629, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.6169614147909968, |
| "grad_norm": 4.844335590656684, |
| "learning_rate": 3.849322706455379e-06, |
| "loss": 0.5862, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.6189710610932476, |
| "grad_norm": 3.9186087206693996, |
| "learning_rate": 3.815214770554755e-06, |
| "loss": 0.5158, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.6209807073954984, |
| "grad_norm": 3.9221369945811198, |
| "learning_rate": 3.781165148195501e-06, |
| "loss": 0.5216, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.6229903536977492, |
| "grad_norm": 3.3756618558855633, |
| "learning_rate": 3.74717551525441e-06, |
| "loss": 0.5138, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.6229903536977492, |
| "eval_cooking_sharegpt_test_loss": 0.5629362463951111, |
| "eval_cooking_sharegpt_test_runtime": 29.1527, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.86, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 4.312505621340993, |
| "learning_rate": 3.713247544655663e-06, |
| "loss": 0.5655, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.6270096463022508, |
| "grad_norm": 4.442878870711612, |
| "learning_rate": 3.6793829062885133e-06, |
| "loss": 0.5324, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.6290192926045016, |
| "grad_norm": 4.1821188904384785, |
| "learning_rate": 3.6455832669250798e-06, |
| "loss": 0.5367, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.6310289389067524, |
| "grad_norm": 4.16995310964458, |
| "learning_rate": 3.611850290138322e-06, |
| "loss": 0.5449, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.6330385852090032, |
| "grad_norm": 4.103557618161658, |
| "learning_rate": 3.578185636220154e-06, |
| "loss": 0.547, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.635048231511254, |
| "grad_norm": 4.221137750943906, |
| "learning_rate": 3.5445909620997317e-06, |
| "loss": 0.6128, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.6370578778135049, |
| "grad_norm": 4.685243877985315, |
| "learning_rate": 3.511067921261897e-06, |
| "loss": 0.5288, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.6390675241157556, |
| "grad_norm": 3.7485957955570526, |
| "learning_rate": 3.4776181636658004e-06, |
| "loss": 0.5361, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.6410771704180064, |
| "grad_norm": 5.13753736382163, |
| "learning_rate": 3.444243335663685e-06, |
| "loss": 0.6099, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.6430868167202572, |
| "grad_norm": 4.20430212422138, |
| "learning_rate": 3.4109450799198667e-06, |
| "loss": 0.5544, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.6430868167202572, |
| "eval_cooking_sharegpt_test_loss": 0.5597677826881409, |
| "eval_cooking_sharegpt_test_runtime": 29.1383, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.864, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.645096463022508, |
| "grad_norm": 4.178198769845903, |
| "learning_rate": 3.3777250353298725e-06, |
| "loss": 0.5958, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.6471061093247589, |
| "grad_norm": 4.042255503484116, |
| "learning_rate": 3.344584836939777e-06, |
| "loss": 0.596, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.6491157556270096, |
| "grad_norm": 4.187189657432103, |
| "learning_rate": 3.3115261158657443e-06, |
| "loss": 0.5823, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.6511254019292605, |
| "grad_norm": 4.893019445921785, |
| "learning_rate": 3.2785504992137208e-06, |
| "loss": 0.5981, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.6531350482315113, |
| "grad_norm": 4.211816172053425, |
| "learning_rate": 3.2456596099993744e-06, |
| "loss": 0.6481, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.655144694533762, |
| "grad_norm": 4.1700590638139925, |
| "learning_rate": 3.2128550670681946e-06, |
| "loss": 0.5761, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.6571543408360129, |
| "grad_norm": 4.161630224139953, |
| "learning_rate": 3.18013848501583e-06, |
| "loss": 0.5866, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.6591639871382636, |
| "grad_norm": 3.557727720022592, |
| "learning_rate": 3.1475114741086064e-06, |
| "loss": 0.4835, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.6611736334405145, |
| "grad_norm": 4.852478551572312, |
| "learning_rate": 3.114975640204282e-06, |
| "loss": 0.5574, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.6631832797427653, |
| "grad_norm": 3.859784406459026, |
| "learning_rate": 3.0825325846730013e-06, |
| "loss": 0.5624, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.6631832797427653, |
| "eval_cooking_sharegpt_test_loss": 0.5586913228034973, |
| "eval_cooking_sharegpt_test_runtime": 29.1426, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.863, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.6651929260450161, |
| "grad_norm": 4.482495052564249, |
| "learning_rate": 3.0501839043184858e-06, |
| "loss": 0.5688, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.6672025723472669, |
| "grad_norm": 4.042365197538812, |
| "learning_rate": 3.017931191299433e-06, |
| "loss": 0.5349, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.6692122186495176, |
| "grad_norm": 4.165393275797863, |
| "learning_rate": 2.985776033051161e-06, |
| "loss": 0.5798, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.6712218649517685, |
| "grad_norm": 4.037795568611387, |
| "learning_rate": 2.9537200122074684e-06, |
| "loss": 0.5308, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.6732315112540193, |
| "grad_norm": 3.870075592190109, |
| "learning_rate": 2.9217647065227474e-06, |
| "loss": 0.5248, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.6752411575562701, |
| "grad_norm": 2.871993790875757, |
| "learning_rate": 2.889911688794322e-06, |
| "loss": 0.5273, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.6772508038585209, |
| "grad_norm": 4.430515877745258, |
| "learning_rate": 2.858162526785046e-06, |
| "loss": 0.5656, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.6792604501607717, |
| "grad_norm": 5.1614822907000395, |
| "learning_rate": 2.8265187831461234e-06, |
| "loss": 0.5579, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.6812700964630225, |
| "grad_norm": 4.630422383447896, |
| "learning_rate": 2.7949820153402163e-06, |
| "loss": 0.6282, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.6832797427652733, |
| "grad_norm": 3.181023467670562, |
| "learning_rate": 2.763553775564778e-06, |
| "loss": 0.5093, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.6832797427652733, |
| "eval_cooking_sharegpt_test_loss": 0.5516761541366577, |
| "eval_cooking_sharegpt_test_runtime": 29.1563, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.86, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.6852893890675241, |
| "grad_norm": 4.646508693714103, |
| "learning_rate": 2.732235610675652e-06, |
| "loss": 0.6124, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.6872990353697749, |
| "grad_norm": 4.162112396548259, |
| "learning_rate": 2.7010290621109527e-06, |
| "loss": 0.5413, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.6893086816720257, |
| "grad_norm": 3.6513543600819998, |
| "learning_rate": 2.6699356658151766e-06, |
| "loss": 0.535, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.6913183279742765, |
| "grad_norm": 5.33408477701188, |
| "learning_rate": 2.6389569521636325e-06, |
| "loss": 0.6191, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.6933279742765274, |
| "grad_norm": 4.0009358447843315, |
| "learning_rate": 2.6080944458870884e-06, |
| "loss": 0.5353, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.6953376205787781, |
| "grad_norm": 4.288625288455154, |
| "learning_rate": 2.577349665996752e-06, |
| "loss": 0.605, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.697347266881029, |
| "grad_norm": 3.7812731515985427, |
| "learning_rate": 2.5467241257094844e-06, |
| "loss": 0.4522, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.6993569131832797, |
| "grad_norm": 4.4091214999888635, |
| "learning_rate": 2.5162193323733475e-06, |
| "loss": 0.598, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.7013665594855305, |
| "grad_norm": 4.1788240029265, |
| "learning_rate": 2.4858367873933885e-06, |
| "loss": 0.5406, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.7033762057877814, |
| "grad_norm": 3.944073910719997, |
| "learning_rate": 2.455577986157762e-06, |
| "loss": 0.5658, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.7033762057877814, |
| "eval_cooking_sharegpt_test_loss": 0.54704350233078, |
| "eval_cooking_sharegpt_test_runtime": 29.1394, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.864, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.7053858520900321, |
| "grad_norm": 4.0409641002525065, |
| "learning_rate": 2.425444417964112e-06, |
| "loss": 0.5993, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.707395498392283, |
| "grad_norm": 3.830335679595113, |
| "learning_rate": 2.395437565946291e-06, |
| "loss": 0.4863, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.7094051446945338, |
| "grad_norm": 3.9483809252674984, |
| "learning_rate": 2.3655589070013434e-06, |
| "loss": 0.538, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.7114147909967846, |
| "grad_norm": 4.721569878991402, |
| "learning_rate": 2.3358099117168277e-06, |
| "loss": 0.6086, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.7134244372990354, |
| "grad_norm": 3.817648873180748, |
| "learning_rate": 2.3061920442984237e-06, |
| "loss": 0.5537, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.7154340836012861, |
| "grad_norm": 4.952943362320339, |
| "learning_rate": 2.276706762497881e-06, |
| "loss": 0.5734, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.717443729903537, |
| "grad_norm": 4.308685442110032, |
| "learning_rate": 2.247355517541259e-06, |
| "loss": 0.5245, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.7194533762057878, |
| "grad_norm": 3.6876910060455015, |
| "learning_rate": 2.2181397540575012e-06, |
| "loss": 0.4904, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.7214630225080386, |
| "grad_norm": 4.056961760469262, |
| "learning_rate": 2.1890609100073406e-06, |
| "loss": 0.5792, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.7234726688102894, |
| "grad_norm": 5.194938789182878, |
| "learning_rate": 2.1601204166125097e-06, |
| "loss": 0.5797, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.7234726688102894, |
| "eval_cooking_sharegpt_test_loss": 0.5419730544090271, |
| "eval_cooking_sharegpt_test_runtime": 29.1573, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.859, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.7254823151125402, |
| "grad_norm": 4.5439902420076, |
| "learning_rate": 2.131319698285321e-06, |
| "loss": 0.5149, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.727491961414791, |
| "grad_norm": 4.193817296752171, |
| "learning_rate": 2.1026601725585303e-06, |
| "loss": 0.5707, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.7295016077170418, |
| "grad_norm": 4.352857862781889, |
| "learning_rate": 2.0741432500155957e-06, |
| "loss": 0.5501, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.7315112540192926, |
| "grad_norm": 4.693742662043891, |
| "learning_rate": 2.045770334221227e-06, |
| "loss": 0.5476, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.7335209003215434, |
| "grad_norm": 3.811953363220478, |
| "learning_rate": 2.017542821652321e-06, |
| "loss": 0.5512, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.7355305466237942, |
| "grad_norm": 4.0209013372552285, |
| "learning_rate": 1.9894621016292233e-06, |
| "loss": 0.5004, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.737540192926045, |
| "grad_norm": 3.7470655457186286, |
| "learning_rate": 1.9615295562473445e-06, |
| "loss": 0.5138, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.7395498392282959, |
| "grad_norm": 5.0495297609749255, |
| "learning_rate": 1.933746560309137e-06, |
| "loss": 0.5589, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.7415594855305466, |
| "grad_norm": 4.4178604447303895, |
| "learning_rate": 1.906114481256432e-06, |
| "loss": 0.5416, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.7435691318327974, |
| "grad_norm": 3.8034316360410676, |
| "learning_rate": 1.8786346791031356e-06, |
| "loss": 0.5376, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.7435691318327974, |
| "eval_cooking_sharegpt_test_loss": 0.5386413335800171, |
| "eval_cooking_sharegpt_test_runtime": 29.1364, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.864, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.7455787781350482, |
| "grad_norm": 3.885403344737721, |
| "learning_rate": 1.8513085063682828e-06, |
| "loss": 0.5474, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.747588424437299, |
| "grad_norm": 3.5164892334257343, |
| "learning_rate": 1.8241373080094822e-06, |
| "loss": 0.4625, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.7495980707395499, |
| "grad_norm": 3.5334086129543008, |
| "learning_rate": 1.7971224213567017e-06, |
| "loss": 0.4698, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.7516077170418006, |
| "grad_norm": 3.7281802070266736, |
| "learning_rate": 1.77026517604647e-06, |
| "loss": 0.5437, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.7536173633440515, |
| "grad_norm": 6.112903495715869, |
| "learning_rate": 1.7435668939564065e-06, |
| "loss": 0.5897, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.7556270096463023, |
| "grad_norm": 4.903593470757499, |
| "learning_rate": 1.7170288891401836e-06, |
| "loss": 0.543, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.757636655948553, |
| "grad_norm": 4.1471801370352175, |
| "learning_rate": 1.6906524677628345e-06, |
| "loss": 0.5533, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.7596463022508039, |
| "grad_norm": 4.44067669671905, |
| "learning_rate": 1.6644389280364748e-06, |
| "loss": 0.5232, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.7616559485530546, |
| "grad_norm": 4.25010398827249, |
| "learning_rate": 1.6383895601564047e-06, |
| "loss": 0.6047, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.7636655948553055, |
| "grad_norm": 4.312065419247702, |
| "learning_rate": 1.6125056462376065e-06, |
| "loss": 0.5323, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.7636655948553055, |
| "eval_cooking_sharegpt_test_loss": 0.534070611000061, |
| "eval_cooking_sharegpt_test_runtime": 29.1362, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.864, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.7656752411575563, |
| "grad_norm": 4.311264813952778, |
| "learning_rate": 1.586788460251636e-06, |
| "loss": 0.4919, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.7676848874598071, |
| "grad_norm": 3.9822945127982186, |
| "learning_rate": 1.561239267963926e-06, |
| "loss": 0.4988, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.7696945337620579, |
| "grad_norm": 3.5862382118282814, |
| "learning_rate": 1.5358593268714866e-06, |
| "loss": 0.556, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.7717041800643086, |
| "grad_norm": 4.65685750488049, |
| "learning_rate": 1.5106498861410101e-06, |
| "loss": 0.5705, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.7737138263665595, |
| "grad_norm": 4.93082892843028, |
| "learning_rate": 1.4856121865473855e-06, |
| "loss": 0.5442, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.7757234726688103, |
| "grad_norm": 4.115014663671977, |
| "learning_rate": 1.460747460412637e-06, |
| "loss": 0.5497, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.7777331189710611, |
| "grad_norm": 3.5438779332216965, |
| "learning_rate": 1.4360569315452682e-06, |
| "loss": 0.4903, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.7797427652733119, |
| "grad_norm": 4.031371623093601, |
| "learning_rate": 1.4115418151800215e-06, |
| "loss": 0.5644, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.7817524115755627, |
| "grad_norm": 3.54725412927624, |
| "learning_rate": 1.3872033179180767e-06, |
| "loss": 0.5178, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.7837620578778135, |
| "grad_norm": 4.445227207707671, |
| "learning_rate": 1.363042637667652e-06, |
| "loss": 0.5802, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.7837620578778135, |
| "eval_cooking_sharegpt_test_loss": 0.5310518145561218, |
| "eval_cooking_sharegpt_test_runtime": 29.1634, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.858, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.7857717041800643, |
| "grad_norm": 5.019716908374287, |
| "learning_rate": 1.339060963585056e-06, |
| "loss": 0.5734, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.7877813504823151, |
| "grad_norm": 3.8522224862461147, |
| "learning_rate": 1.3152594760161513e-06, |
| "loss": 0.4906, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.7897909967845659, |
| "grad_norm": 3.6624257408254457, |
| "learning_rate": 1.2916393464382632e-06, |
| "loss": 0.4873, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.7918006430868167, |
| "grad_norm": 5.220019531776816, |
| "learning_rate": 1.2682017374025158e-06, |
| "loss": 0.5863, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.7938102893890675, |
| "grad_norm": 3.978993529598662, |
| "learning_rate": 1.2449478024766205e-06, |
| "loss": 0.4623, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.7958199356913184, |
| "grad_norm": 4.0726044586510675, |
| "learning_rate": 1.2218786861880937e-06, |
| "loss": 0.496, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.7978295819935691, |
| "grad_norm": 3.8206385965353222, |
| "learning_rate": 1.1989955239679279e-06, |
| "loss": 0.5187, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.7998392282958199, |
| "grad_norm": 3.9512193648944653, |
| "learning_rate": 1.1762994420947016e-06, |
| "loss": 0.4982, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.8018488745980707, |
| "grad_norm": 3.8053938839616412, |
| "learning_rate": 1.153791557639153e-06, |
| "loss": 0.5194, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.8038585209003215, |
| "grad_norm": 3.6251196777845616, |
| "learning_rate": 1.1314729784091937e-06, |
| "loss": 0.5537, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8038585209003215, |
| "eval_cooking_sharegpt_test_loss": 0.5269535779953003, |
| "eval_cooking_sharegpt_test_runtime": 29.147, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.862, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8058681672025724, |
| "grad_norm": 4.043004314873042, |
| "learning_rate": 1.1093448028953886e-06, |
| "loss": 0.4801, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.8078778135048231, |
| "grad_norm": 3.864239177756637, |
| "learning_rate": 1.0874081202168806e-06, |
| "loss": 0.4985, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.809887459807074, |
| "grad_norm": 3.822597851846882, |
| "learning_rate": 1.065664010067799e-06, |
| "loss": 0.4991, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.8118971061093248, |
| "grad_norm": 3.857276347852937, |
| "learning_rate": 1.0441135426641074e-06, |
| "loss": 0.4637, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.8139067524115756, |
| "grad_norm": 4.4337279604757, |
| "learning_rate": 1.0227577786909332e-06, |
| "loss": 0.5738, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.8159163987138264, |
| "grad_norm": 3.6706223677728254, |
| "learning_rate": 1.0015977692503632e-06, |
| "loss": 0.5243, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.8179260450160771, |
| "grad_norm": 4.517936325301277, |
| "learning_rate": 9.806345558097053e-07, |
| "loss": 0.5106, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.819935691318328, |
| "grad_norm": 4.230476927090911, |
| "learning_rate": 9.59869170150236e-07, |
| "loss": 0.5789, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.8219453376205788, |
| "grad_norm": 4.043867851008032, |
| "learning_rate": 9.393026343164114e-07, |
| "loss": 0.5238, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.8239549839228296, |
| "grad_norm": 3.433756831229721, |
| "learning_rate": 9.189359605655668e-07, |
| "loss": 0.4972, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.8239549839228296, |
| "eval_cooking_sharegpt_test_loss": 0.5249894261360168, |
| "eval_cooking_sharegpt_test_runtime": 29.1006, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.873, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.344, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.8259646302250804, |
| "grad_norm": 4.6112587559432, |
| "learning_rate": 8.987701513180907e-07, |
| "loss": 0.5356, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.8279742765273312, |
| "grad_norm": 4.050624512976509, |
| "learning_rate": 8.788061991080937e-07, |
| "loss": 0.519, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.829983922829582, |
| "grad_norm": 3.9107977100160123, |
| "learning_rate": 8.590450865345512e-07, |
| "loss": 0.5988, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.8319935691318328, |
| "grad_norm": 4.197001300269493, |
| "learning_rate": 8.394877862129446e-07, |
| "loss": 0.4833, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.8340032154340836, |
| "grad_norm": 4.289046142719884, |
| "learning_rate": 8.201352607273877e-07, |
| "loss": 0.5961, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.8360128617363344, |
| "grad_norm": 4.718556012278376, |
| "learning_rate": 8.009884625832531e-07, |
| "loss": 0.5824, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.8380225080385852, |
| "grad_norm": 4.096609919204836, |
| "learning_rate": 7.82048334160288e-07, |
| "loss": 0.5427, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.840032154340836, |
| "grad_norm": 4.906614424028583, |
| "learning_rate": 7.633158076662356e-07, |
| "loss": 0.6349, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.8420418006430869, |
| "grad_norm": 4.694926277231841, |
| "learning_rate": 7.447918050909453e-07, |
| "loss": 0.5806, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.8440514469453376, |
| "grad_norm": 3.9943595352709735, |
| "learning_rate": 7.264772381610041e-07, |
| "loss": 0.5315, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.8440514469453376, |
| "eval_cooking_sharegpt_test_loss": 0.5224404335021973, |
| "eval_cooking_sharegpt_test_runtime": 29.1427, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.863, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.8460610932475884, |
| "grad_norm": 3.8949136566294995, |
| "learning_rate": 7.083730082948526e-07, |
| "loss": 0.4789, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.8480707395498392, |
| "grad_norm": 4.010454819797958, |
| "learning_rate": 6.904800065584255e-07, |
| "loss": 0.4783, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.85008038585209, |
| "grad_norm": 4.746242279274611, |
| "learning_rate": 6.727991136212931e-07, |
| "loss": 0.546, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.8520900321543409, |
| "grad_norm": 4.283372719633206, |
| "learning_rate": 6.553311997133111e-07, |
| "loss": 0.5003, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.8540996784565916, |
| "grad_norm": 3.560283237601326, |
| "learning_rate": 6.380771245817957e-07, |
| "loss": 0.4842, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.8561093247588425, |
| "grad_norm": 3.79934997015618, |
| "learning_rate": 6.210377374492049e-07, |
| "loss": 0.4678, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.8581189710610932, |
| "grad_norm": 4.1885519690593185, |
| "learning_rate": 6.042138769713413e-07, |
| "loss": 0.5096, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.860128617363344, |
| "grad_norm": 3.97970854328499, |
| "learning_rate": 5.876063711960706e-07, |
| "loss": 0.4941, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.8621382636655949, |
| "grad_norm": 4.394448792908495, |
| "learning_rate": 5.712160375225756e-07, |
| "loss": 0.5573, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.8641479099678456, |
| "grad_norm": 4.127283021998723, |
| "learning_rate": 5.55043682661115e-07, |
| "loss": 0.5165, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.8641479099678456, |
| "eval_cooking_sharegpt_test_loss": 0.5204899311065674, |
| "eval_cooking_sharegpt_test_runtime": 29.1702, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.856, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.8661575562700965, |
| "grad_norm": 3.69665008473374, |
| "learning_rate": 5.39090102593326e-07, |
| "loss": 0.5317, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.8681672025723473, |
| "grad_norm": 3.5634773461727223, |
| "learning_rate": 5.233560825330387e-07, |
| "loss": 0.5341, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.8701768488745981, |
| "grad_norm": 3.450288752072914, |
| "learning_rate": 5.0784239688764e-07, |
| "loss": 0.4069, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.8721864951768489, |
| "grad_norm": 4.428896151960056, |
| "learning_rate": 4.925498092199449e-07, |
| "loss": 0.5154, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.8741961414790996, |
| "grad_norm": 3.6865145625805633, |
| "learning_rate": 4.774790722106309e-07, |
| "loss": 0.5408, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.8762057877813505, |
| "grad_norm": 3.8579452526033293, |
| "learning_rate": 4.6263092762117546e-07, |
| "loss": 0.5051, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.8782154340836013, |
| "grad_norm": 3.649942640889945, |
| "learning_rate": 4.480061062573604e-07, |
| "loss": 0.4879, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.8802250803858521, |
| "grad_norm": 4.4196873140991935, |
| "learning_rate": 4.336053279332941e-07, |
| "loss": 0.5404, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.8822347266881029, |
| "grad_norm": 3.8686685069961064, |
| "learning_rate": 4.1942930143599014e-07, |
| "loss": 0.4976, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.8842443729903537, |
| "grad_norm": 4.070283502525038, |
| "learning_rate": 4.0547872449047674e-07, |
| "loss": 0.5689, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.8842443729903537, |
| "eval_cooking_sharegpt_test_loss": 0.5188571810722351, |
| "eval_cooking_sharegpt_test_runtime": 29.1495, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.861, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.8862540192926045, |
| "grad_norm": 3.619269487786876, |
| "learning_rate": 3.917542837254562e-07, |
| "loss": 0.5528, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.8882636655948553, |
| "grad_norm": 3.6657937873825137, |
| "learning_rate": 3.7825665463951224e-07, |
| "loss": 0.541, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.8902733118971061, |
| "grad_norm": 3.4204803639440526, |
| "learning_rate": 3.649865015678622e-07, |
| "loss": 0.4743, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.8922829581993569, |
| "grad_norm": 4.380202502107755, |
| "learning_rate": 3.5194447764965887e-07, |
| "loss": 0.546, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.8942926045016077, |
| "grad_norm": 4.402021876559463, |
| "learning_rate": 3.391312247958417e-07, |
| "loss": 0.5446, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.8963022508038585, |
| "grad_norm": 4.747626450695015, |
| "learning_rate": 3.265473736575475e-07, |
| "loss": 0.5655, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.8983118971061094, |
| "grad_norm": 3.68468614908709, |
| "learning_rate": 3.141935435950644e-07, |
| "loss": 0.6147, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.9003215434083601, |
| "grad_norm": 3.535700858658026, |
| "learning_rate": 3.0207034264735756e-07, |
| "loss": 0.5006, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.9023311897106109, |
| "grad_norm": 3.848416834162935, |
| "learning_rate": 2.901783675021297e-07, |
| "loss": 0.5161, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.9043408360128617, |
| "grad_norm": 3.730188112771092, |
| "learning_rate": 2.785182034664641e-07, |
| "loss": 0.5191, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.9043408360128617, |
| "eval_cooking_sharegpt_test_loss": 0.5172947645187378, |
| "eval_cooking_sharegpt_test_runtime": 29.1343, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.865, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.9063504823151125, |
| "grad_norm": 4.611573430946674, |
| "learning_rate": 2.670904244380068e-07, |
| "loss": 0.5719, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.9083601286173634, |
| "grad_norm": 3.4190333394817625, |
| "learning_rate": 2.5589559287673205e-07, |
| "loss": 0.5415, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.9103697749196141, |
| "grad_norm": 3.7043226879021027, |
| "learning_rate": 2.4493425977724585e-07, |
| "loss": 0.5431, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.912379421221865, |
| "grad_norm": 3.5791803584446877, |
| "learning_rate": 2.3420696464167614e-07, |
| "loss": 0.5563, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.9143890675241158, |
| "grad_norm": 3.6102349926375292, |
| "learning_rate": 2.237142354531141e-07, |
| "loss": 0.5127, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.9163987138263665, |
| "grad_norm": 4.04764189714931, |
| "learning_rate": 2.1345658864962982e-07, |
| "loss": 0.5183, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.9184083601286174, |
| "grad_norm": 4.12380254309591, |
| "learning_rate": 2.0343452909885487e-07, |
| "loss": 0.5244, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.9204180064308681, |
| "grad_norm": 2.9917916489817657, |
| "learning_rate": 1.9364855007313e-07, |
| "loss": 0.6096, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.922427652733119, |
| "grad_norm": 4.666115781878385, |
| "learning_rate": 1.84099133225229e-07, |
| "loss": 0.5603, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.9244372990353698, |
| "grad_norm": 4.1461620270350545, |
| "learning_rate": 1.747867485646537e-07, |
| "loss": 0.4639, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.9244372990353698, |
| "eval_cooking_sharegpt_test_loss": 0.5164940357208252, |
| "eval_cooking_sharegpt_test_runtime": 29.1503, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.861, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.9264469453376206, |
| "grad_norm": 3.424368627441663, |
| "learning_rate": 1.6571185443449934e-07, |
| "loss": 0.4837, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.9284565916398714, |
| "grad_norm": 3.3861373326786874, |
| "learning_rate": 1.5687489748889228e-07, |
| "loss": 0.5225, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.9304662379421221, |
| "grad_norm": 4.0432557223167365, |
| "learning_rate": 1.482763126710135e-07, |
| "loss": 0.505, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.932475884244373, |
| "grad_norm": 4.149053064263076, |
| "learning_rate": 1.3991652319168436e-07, |
| "loss": 0.5735, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.9344855305466238, |
| "grad_norm": 4.436444939575187, |
| "learning_rate": 1.3179594050854227e-07, |
| "loss": 0.572, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.9364951768488746, |
| "grad_norm": 4.509769479123379, |
| "learning_rate": 1.239149643057841e-07, |
| "loss": 0.4864, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.9385048231511254, |
| "grad_norm": 4.668035022637942, |
| "learning_rate": 1.1627398247449906e-07, |
| "loss": 0.5206, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.9405144694533762, |
| "grad_norm": 3.9506527567648697, |
| "learning_rate": 1.08873371093573e-07, |
| "loss": 0.6029, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.942524115755627, |
| "grad_norm": 4.230839087401455, |
| "learning_rate": 1.017134944111814e-07, |
| "loss": 0.5838, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.9445337620578779, |
| "grad_norm": 4.151545046693275, |
| "learning_rate": 9.479470482686048e-08, |
| "loss": 0.487, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.9445337620578779, |
| "eval_cooking_sharegpt_test_loss": 0.5156686305999756, |
| "eval_cooking_sharegpt_test_runtime": 29.1666, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.857, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.9465434083601286, |
| "grad_norm": 4.142210605255353, |
| "learning_rate": 8.811734287416274e-08, |
| "loss": 0.5335, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.9485530546623794, |
| "grad_norm": 3.850811332598397, |
| "learning_rate": 8.168173720389472e-08, |
| "loss": 0.5186, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.9505627009646302, |
| "grad_norm": 3.597795214532193, |
| "learning_rate": 7.548820456794448e-08, |
| "loss": 0.4817, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.952572347266881, |
| "grad_norm": 3.8186393981188114, |
| "learning_rate": 6.953704980368958e-08, |
| "loss": 0.4579, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.9545819935691319, |
| "grad_norm": 4.174150444604705, |
| "learning_rate": 6.382856581899133e-08, |
| "loss": 0.5271, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.9565916398713826, |
| "grad_norm": 4.2074795390285225, |
| "learning_rate": 5.8363033577784055e-08, |
| "loss": 0.5917, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.9586012861736335, |
| "grad_norm": 3.9305248224525404, |
| "learning_rate": 5.314072208623844e-08, |
| "loss": 0.4585, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.9606109324758842, |
| "grad_norm": 3.9808292965527037, |
| "learning_rate": 4.81618883795304e-08, |
| "loss": 0.4707, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.962620578778135, |
| "grad_norm": 3.753591113764896, |
| "learning_rate": 4.342677750918178e-08, |
| "loss": 0.5422, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.9646302250803859, |
| "grad_norm": 3.6932078456444373, |
| "learning_rate": 3.8935622531006136e-08, |
| "loss": 0.4571, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.9646302250803859, |
| "eval_cooking_sharegpt_test_loss": 0.5150659680366516, |
| "eval_cooking_sharegpt_test_runtime": 29.1762, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.855, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.9666398713826366, |
| "grad_norm": 3.7902631860251472, |
| "learning_rate": 3.468864449363119e-08, |
| "loss": 0.472, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.9686495176848875, |
| "grad_norm": 4.873378997207432, |
| "learning_rate": 3.0686052427626454e-08, |
| "loss": 0.5863, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.9706591639871383, |
| "grad_norm": 3.41955309662197, |
| "learning_rate": 2.692804333520982e-08, |
| "loss": 0.5606, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.9726688102893891, |
| "grad_norm": 3.9261487092055645, |
| "learning_rate": 2.341480218055303e-08, |
| "loss": 0.5137, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.9746784565916399, |
| "grad_norm": 3.908466740159429, |
| "learning_rate": 2.014650188067735e-08, |
| "loss": 0.5444, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.9766881028938906, |
| "grad_norm": 4.095217894179939, |
| "learning_rate": 1.7123303296944226e-08, |
| "loss": 0.5302, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.9786977491961415, |
| "grad_norm": 3.3767195966017485, |
| "learning_rate": 1.4345355227137203e-08, |
| "loss": 0.5031, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.9807073954983923, |
| "grad_norm": 3.8928483675929337, |
| "learning_rate": 1.1812794398137762e-08, |
| "loss": 0.5233, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.9827170418006431, |
| "grad_norm": 4.410721124785433, |
| "learning_rate": 9.525745459195712e-09, |
| "loss": 0.5351, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.9847266881028939, |
| "grad_norm": 4.367071424030973, |
| "learning_rate": 7.484320975795766e-09, |
| "loss": 0.45, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.9847266881028939, |
| "eval_cooking_sharegpt_test_loss": 0.5149813294410706, |
| "eval_cooking_sharegpt_test_runtime": 29.1432, |
| "eval_cooking_sharegpt_test_samples_per_second": 6.863, |
| "eval_cooking_sharegpt_test_steps_per_second": 0.343, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.9867363344051447, |
| "grad_norm": 4.348603146645124, |
| "learning_rate": 5.688621424115304e-09, |
| "loss": 0.4929, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.9887459807073955, |
| "grad_norm": 3.4464338928700866, |
| "learning_rate": 4.1387351860799894e-09, |
| "loss": 0.5602, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.9907556270096463, |
| "grad_norm": 4.117129096266541, |
| "learning_rate": 2.8347385450133715e-09, |
| "loss": 0.4746, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.9927652733118971, |
| "grad_norm": 3.7130286969995274, |
| "learning_rate": 1.7766956818832116e-09, |
| "loss": 0.4795, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.9947749196141479, |
| "grad_norm": 3.8454728155367164, |
| "learning_rate": 9.646586721412388e-10, |
| "loss": 0.5535, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.9967845659163987, |
| "grad_norm": 3.694656061423026, |
| "learning_rate": 3.986674831607529e-10, |
| "loss": 0.5087, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.9987942122186495, |
| "grad_norm": 4.211180713946689, |
| "learning_rate": 7.87499722693097e-11, |
| "loss": 0.4456, |
| "step": 2485 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 2488, |
| "total_flos": 19663349465088.0, |
| "train_loss": 0.6474186228019249, |
| "train_runtime": 24049.0144, |
| "train_samples_per_second": 0.827, |
| "train_steps_per_second": 0.103 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 2488, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 19663349465088.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|